Coverage for tests/test_datastore.py: 11%
1009 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-23 09:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import pickle
26import shutil
27import tempfile
28import time
29import unittest
30import unittest.mock
31import uuid
32from collections.abc import Callable
33from typing import Any, cast
35import lsst.utils.tests
36import yaml
37from lsst.daf.butler import (
38 Config,
39 DataCoordinate,
40 DatasetRef,
41 DatasetRefURIs,
42 DatasetType,
43 DatasetTypeNotSupportedError,
44 Datastore,
45 DatastoreCacheManager,
46 DatastoreCacheManagerConfig,
47 DatastoreConfig,
48 DatastoreDisabledCacheManager,
49 DatastoreValidationError,
50 DimensionUniverse,
51 FileDataset,
52 StorageClass,
53 StorageClassFactory,
54 StoredFileInfo,
55)
56from lsst.daf.butler.formatters.yaml import YamlFormatter
57from lsst.daf.butler.tests import (
58 BadNoWriteFormatter,
59 BadWriteFormatter,
60 DatasetTestHelper,
61 DatastoreTestHelper,
62 DummyRegistry,
63 MetricsExample,
64 MetricsExampleDataclass,
65 MetricsExampleModel,
66)
67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel
68from lsst.daf.butler.tests.utils import TestCaseMixin
69from lsst.resources import ResourcePath
70from lsst.utils import doImport
72TESTDIR = os.path.dirname(__file__)
75def makeExampleMetrics(use_none: bool = False) -> MetricsExample:
76 if use_none:
77 array = None
78 else:
79 array = [563, 234, 456.7, 105, 2054, -1045]
80 return MetricsExample(
81 {"AM1": 5.2, "AM2": 30.6},
82 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
83 array,
84 )
87class TransactionTestError(Exception):
88 """Specific error for transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
92 pass
95class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin):
96 """Support routines for datastore testing"""
98 root: str | None = None
99 universe: DimensionUniverse
100 storageClassFactory: StorageClassFactory
102 @classmethod
103 def setUpClass(cls) -> None:
104 # Storage Classes are fixed for all datastores in these tests
105 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
106 cls.storageClassFactory = StorageClassFactory()
107 cls.storageClassFactory.addFromConfig(scConfigFile)
109 # Read the Datastore config so we can get the class
110 # information (since we should not assume the constructor
111 # name here, but rely on the configuration file itself)
112 datastoreConfig = DatastoreConfig(cls.configFile)
113 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"]))
114 cls.universe = DimensionUniverse()
116 def setUp(self) -> None:
117 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig)
119 def tearDown(self) -> None:
120 if self.root is not None and os.path.exists(self.root):
121 shutil.rmtree(self.root, ignore_errors=True)
124class DatastoreTests(DatastoreTestsBase):
125 """Some basic tests of a simple datastore."""
127 hasUnsupportedPut = True
128 rootKeys: tuple[str, ...] | None = None
129 isEphemeral: bool = False
130 validationCanFail: bool = False
132 def testConfigRoot(self) -> None:
133 full = DatastoreConfig(self.configFile)
134 config = DatastoreConfig(self.configFile, mergeDefaults=False)
135 newroot = "/random/location"
136 self.datastoreType.setConfigRoot(newroot, config, full)
137 if self.rootKeys:
138 for k in self.rootKeys:
139 self.assertIn(newroot, config[k])
141 def testConstructor(self) -> None:
142 datastore = self.makeDatastore()
143 self.assertIsNotNone(datastore)
144 self.assertIs(datastore.isEphemeral, self.isEphemeral)
146 def testConfigurationValidation(self) -> None:
147 datastore = self.makeDatastore()
148 sc = self.storageClassFactory.getStorageClass("ThingOne")
149 datastore.validateConfiguration([sc])
151 sc2 = self.storageClassFactory.getStorageClass("ThingTwo")
152 if self.validationCanFail:
153 with self.assertRaises(DatastoreValidationError):
154 datastore.validateConfiguration([sc2], logFailures=True)
156 dimensions = self.universe.extract(("visit", "physical_filter"))
157 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
158 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
159 datastore.validateConfiguration([ref])
161 def testParameterValidation(self) -> None:
162 """Check that parameters are validated"""
163 sc = self.storageClassFactory.getStorageClass("ThingOne")
164 dimensions = self.universe.extract(("visit", "physical_filter"))
165 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
167 datastore = self.makeDatastore()
168 data = {1: 2, 3: 4}
169 datastore.put(data, ref)
170 newdata = datastore.get(ref)
171 self.assertEqual(data, newdata)
172 with self.assertRaises(KeyError):
173 newdata = datastore.get(ref, parameters={"missing": 5})
175 def testBasicPutGet(self) -> None:
176 metrics = makeExampleMetrics()
177 datastore = self.makeDatastore()
179 # Create multiple storage classes for testing different formulations
180 storageClasses = [
181 self.storageClassFactory.getStorageClass(sc)
182 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle")
183 ]
185 dimensions = self.universe.extract(("visit", "physical_filter"))
186 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
187 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"})
189 for sc in storageClasses:
190 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
191 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2)
193 # Make sure that using getManyURIs without predicting before the
194 # dataset has been put raises.
195 with self.assertRaises(FileNotFoundError):
196 datastore.getManyURIs([ref], predict=False)
198 # Make sure that using getManyURIs with predicting before the
199 # dataset has been put predicts the URI.
200 uris = datastore.getManyURIs([ref, ref2], predict=True)
201 self.assertIn("52", uris[ref].primaryURI.geturl())
202 self.assertIn("#predicted", uris[ref].primaryURI.geturl())
203 self.assertIn("53", uris[ref2].primaryURI.geturl())
204 self.assertIn("#predicted", uris[ref2].primaryURI.geturl())
206 datastore.put(metrics, ref)
208 # Does it exist?
209 self.assertTrue(datastore.exists(ref))
210 self.assertTrue(datastore.knows(ref))
211 multi = datastore.knows_these([ref])
212 self.assertTrue(multi[ref])
213 multi = datastore.mexists([ref, ref2])
214 self.assertTrue(multi[ref])
215 self.assertFalse(multi[ref2])
217 # Get
218 metricsOut = datastore.get(ref, parameters=None)
219 self.assertEqual(metrics, metricsOut)
221 uri = datastore.getURI(ref)
222 self.assertEqual(uri.scheme, self.uriScheme)
224 uris = datastore.getManyURIs([ref])
225 self.assertEqual(len(uris), 1)
226 ref, uri = uris.popitem()
227 self.assertTrue(uri.primaryURI.exists())
228 self.assertFalse(uri.componentURIs)
230 # Get a component -- we need to construct new refs for them
231 # with derived storage classes but with parent ID
232 for comp in ("data", "output"):
233 compRef = ref.makeComponentRef(comp)
234 output = datastore.get(compRef)
235 self.assertEqual(output, getattr(metricsOut, comp))
237 uri = datastore.getURI(compRef)
238 self.assertEqual(uri.scheme, self.uriScheme)
240 uris = datastore.getManyURIs([compRef])
241 self.assertEqual(len(uris), 1)
243 storageClass = sc
245 # Check that we can put a metric with None in a component and
246 # get it back as None
247 metricsNone = makeExampleMetrics(use_none=True)
248 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"}
249 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone)
250 datastore.put(metricsNone, refNone)
252 comp = "data"
253 for comp in ("data", "output"):
254 compRef = refNone.makeComponentRef(comp)
255 output = datastore.get(compRef)
256 self.assertEqual(output, getattr(metricsNone, comp))
258 # Check that a put fails if the dataset type is not supported
259 if self.hasUnsupportedPut:
260 sc = StorageClass("UnsupportedSC", pytype=type(metrics))
261 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId)
262 with self.assertRaises(DatasetTypeNotSupportedError):
263 datastore.put(metrics, ref)
265 # These should raise
266 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId)
267 with self.assertRaises(FileNotFoundError):
268 # non-existing file
269 datastore.get(ref)
271 # Get a URI from it
272 uri = datastore.getURI(ref, predict=True)
273 self.assertEqual(uri.scheme, self.uriScheme)
275 with self.assertRaises(FileNotFoundError):
276 datastore.getURI(ref)
278 def testTrustGetRequest(self) -> None:
279 """Check that we can get datasets that registry knows nothing about."""
281 datastore = self.makeDatastore()
283 # Skip test if the attribute is not defined
284 if not hasattr(datastore, "trustGetRequest"):
285 return
287 metrics = makeExampleMetrics()
289 i = 0
290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"):
291 i += 1
292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time.
294 if sc_name == "StructuredComposite":
295 disassembled = True
296 else:
297 disassembled = False
299 # Start datastore in default configuration of using registry
300 datastore.trustGetRequest = False
302 # Create multiple storage classes for testing with or without
303 # disassembly
304 sc = self.storageClassFactory.getStorageClass(sc_name)
305 dimensions = self.universe.extract(("visit", "physical_filter"))
307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"})
309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
310 datastore.put(metrics, ref)
312 # Does it exist?
313 self.assertTrue(datastore.exists(ref))
314 self.assertTrue(datastore.knows(ref))
315 multi = datastore.knows_these([ref])
316 self.assertTrue(multi[ref])
317 multi = datastore.mexists([ref])
318 self.assertTrue(multi[ref])
320 # Get
321 metricsOut = datastore.get(ref)
322 self.assertEqual(metrics, metricsOut)
324 # Get the URI(s)
325 primaryURI, componentURIs = datastore.getURIs(ref)
326 if disassembled:
327 self.assertIsNone(primaryURI)
328 self.assertEqual(len(componentURIs), 3)
329 else:
330 self.assertIn(datasetTypeName, primaryURI.path)
331 self.assertFalse(componentURIs)
333 # Delete registry entry so now we are trusting
334 datastore.removeStoredItemInfo(ref)
336 # Now stop trusting and check that things break
337 datastore.trustGetRequest = False
339 # Does it exist?
340 self.assertFalse(datastore.exists(ref))
341 self.assertFalse(datastore.knows(ref))
342 multi = datastore.knows_these([ref])
343 self.assertFalse(multi[ref])
344 multi = datastore.mexists([ref])
345 self.assertFalse(multi[ref])
347 with self.assertRaises(FileNotFoundError):
348 datastore.get(ref)
350 if sc_name != "StructuredDataNoComponents":
351 with self.assertRaises(FileNotFoundError):
352 datastore.get(ref.makeComponentRef("data"))
354 # URI should fail unless we ask for prediction
355 with self.assertRaises(FileNotFoundError):
356 datastore.getURIs(ref)
358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True)
359 if disassembled:
360 self.assertIsNone(predicted_primary)
361 self.assertEqual(len(predicted_disassembled), 3)
362 for uri in predicted_disassembled.values():
363 self.assertEqual(uri.fragment, "predicted")
364 self.assertIn(datasetTypeName, uri.path)
365 else:
366 self.assertIn(datasetTypeName, predicted_primary.path)
367 self.assertFalse(predicted_disassembled)
368 self.assertEqual(predicted_primary.fragment, "predicted")
370 # Now enable registry-free trusting mode
371 datastore.trustGetRequest = True
373 # Try again to get it
374 metricsOut = datastore.get(ref)
375 self.assertEqual(metricsOut, metrics)
377 # Does it exist?
378 self.assertTrue(datastore.exists(ref))
380 # Get a component
381 if sc_name != "StructuredDataNoComponents":
382 comp = "data"
383 compRef = ref.makeComponentRef(comp)
384 output = datastore.get(compRef)
385 self.assertEqual(output, getattr(metrics, comp))
387 # Get the URI -- if we trust this should work even without
388 # enabling prediction.
389 primaryURI2, componentURIs2 = datastore.getURIs(ref)
390 self.assertEqual(primaryURI2, primaryURI)
391 self.assertEqual(componentURIs2, componentURIs)
393 # Check for compatible storage class.
394 if sc_name in ("StructuredDataNoComponents", "StructuredData"):
395 # Make new dataset ref with compatible storage class.
396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson")
398 # Without `set_retrieve_dataset_type_method` it will fail to
399 # find correct file.
400 self.assertFalse(datastore.exists(ref_comp))
401 with self.assertRaises(FileNotFoundError):
402 datastore.get(ref_comp)
403 with self.assertRaises(FileNotFoundError):
404 datastore.get(ref, storageClass="StructuredDataDictJson")
406 # Need a special method to generate stored dataset type.
407 def _stored_dataset_type(name: str) -> DatasetType:
408 if name == ref.datasetType.name:
409 return ref.datasetType
410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}")
412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type)
414 # Storage class override with original dataset ref.
415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson")
416 self.assertIsInstance(metrics_as_dict, dict)
418 # get() should return a dict now.
419 metrics_as_dict = datastore.get(ref_comp)
420 self.assertIsInstance(metrics_as_dict, dict)
422 # exists() should work as well.
423 self.assertTrue(datastore.exists(ref_comp))
425 datastore.set_retrieve_dataset_type_method(None)
427 def testDisassembly(self) -> None:
428 """Test disassembly within datastore."""
429 metrics = makeExampleMetrics()
430 if self.isEphemeral:
431 # in-memory datastore does not disassemble
432 return
434 # Create multiple storage classes for testing different formulations
435 # of composites. One of these will not disassemble to provide
436 # a reference.
437 storageClasses = [
438 self.storageClassFactory.getStorageClass(sc)
439 for sc in (
440 "StructuredComposite",
441 "StructuredCompositeTestA",
442 "StructuredCompositeTestB",
443 "StructuredCompositeReadComp",
444 "StructuredData", # No disassembly
445 "StructuredCompositeReadCompNoDisassembly",
446 )
447 ]
449 # Create the test datastore
450 datastore = self.makeDatastore()
452 # Dummy dataId
453 dimensions = self.universe.extract(("visit", "physical_filter"))
454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"})
456 for i, sc in enumerate(storageClasses):
457 with self.subTest(storageClass=sc.name):
458 # Create a different dataset type each time round
459 # so that a test failure in this subtest does not trigger
460 # a cascade of tests because of file clashes
461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId)
463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"}
465 datastore.put(metrics, ref)
467 baseURI, compURIs = datastore.getURIs(ref)
468 if disassembled:
469 self.assertIsNone(baseURI)
470 self.assertEqual(set(compURIs), {"data", "output", "summary"})
471 else:
472 self.assertIsNotNone(baseURI)
473 self.assertEqual(compURIs, {})
475 metrics_get = datastore.get(ref)
476 self.assertEqual(metrics_get, metrics)
478 # Retrieve the composite with read parameter
479 stop = 4
480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)})
481 self.assertEqual(metrics_get.summary, metrics.summary)
482 self.assertEqual(metrics_get.output, metrics.output)
483 self.assertEqual(metrics_get.data, metrics.data[:stop])
485 # Retrieve a component
486 data = datastore.get(ref.makeComponentRef("data"))
487 self.assertEqual(data, metrics.data)
489 # On supported storage classes attempt to access a read
490 # only component
491 if "ReadComp" in sc.name:
492 cRef = ref.makeComponentRef("counter")
493 counter = datastore.get(cRef)
494 self.assertEqual(counter, len(metrics.data))
496 counter = datastore.get(cRef, parameters={"slice": slice(stop)})
497 self.assertEqual(counter, stop)
499 datastore.remove(ref)
501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]:
502 metrics = makeExampleMetrics()
503 datastore = self.makeDatastore()
504 # Put
505 dimensions = self.universe.extract(("visit", "physical_filter"))
506 sc = self.storageClassFactory.getStorageClass("StructuredData")
507 refs = []
508 for i in range(n_refs):
509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"}
510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
511 datastore.put(metrics, ref)
513 # Does it exist?
514 self.assertTrue(datastore.exists(ref))
516 # Get
517 metricsOut = datastore.get(ref)
518 self.assertEqual(metrics, metricsOut)
519 refs.append(ref)
521 return datastore, *refs
523 def testRemove(self) -> None:
524 datastore, ref = self.prepDeleteTest()
526 # Remove
527 datastore.remove(ref)
529 # Does it exist?
530 self.assertFalse(datastore.exists(ref))
532 # Do we now get a predicted URI?
533 uri = datastore.getURI(ref, predict=True)
534 self.assertEqual(uri.fragment, "predicted")
536 # Get should now fail
537 with self.assertRaises(FileNotFoundError):
538 datastore.get(ref)
539 # Can only delete once
540 with self.assertRaises(FileNotFoundError):
541 datastore.remove(ref)
543 def testForget(self) -> None:
544 datastore, ref = self.prepDeleteTest()
546 # Remove
547 datastore.forget([ref])
549 # Does it exist (as far as we know)?
550 self.assertFalse(datastore.exists(ref))
552 # Do we now get a predicted URI?
553 uri = datastore.getURI(ref, predict=True)
554 self.assertEqual(uri.fragment, "predicted")
556 # Get should now fail
557 with self.assertRaises(FileNotFoundError):
558 datastore.get(ref)
560 # Forgetting again is a silent no-op
561 datastore.forget([ref])
563 # Predicted URI should still point to the file.
564 self.assertTrue(uri.exists())
566 def testTransfer(self) -> None:
567 metrics = makeExampleMetrics()
569 dimensions = self.universe.extract(("visit", "physical_filter"))
570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"})
572 sc = self.storageClassFactory.getStorageClass("StructuredData")
573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
575 inputDatastore = self.makeDatastore("test_input_datastore")
576 outputDatastore = self.makeDatastore("test_output_datastore")
578 inputDatastore.put(metrics, ref)
579 outputDatastore.transfer(inputDatastore, ref)
581 metricsOut = outputDatastore.get(ref)
582 self.assertEqual(metrics, metricsOut)
584 def testBasicTransaction(self) -> None:
585 datastore = self.makeDatastore()
586 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
587 dimensions = self.universe.extract(("visit", "physical_filter"))
588 nDatasets = 6
589 dataIds = [
590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"})
591 for i in range(nDatasets)
592 ]
593 data = [
594 (
595 self.makeDatasetRef("metric", dimensions, storageClass, dataId),
596 makeExampleMetrics(),
597 )
598 for dataId in dataIds
599 ]
600 succeed = data[: nDatasets // 2]
601 fail = data[nDatasets // 2 :]
602 # All datasets added in this transaction should continue to exist
603 with datastore.transaction():
604 for ref, metrics in succeed:
605 datastore.put(metrics, ref)
606 # Whereas datasets added in this transaction should not
607 with self.assertRaises(TransactionTestError):
608 with datastore.transaction():
609 for ref, metrics in fail:
610 datastore.put(metrics, ref)
611 raise TransactionTestError("This should propagate out of the context manager")
612 # Check for datasets that should exist
613 for ref, metrics in succeed:
614 # Does it exist?
615 self.assertTrue(datastore.exists(ref))
616 # Get
617 metricsOut = datastore.get(ref, parameters=None)
618 self.assertEqual(metrics, metricsOut)
619 # URI
620 uri = datastore.getURI(ref)
621 self.assertEqual(uri.scheme, self.uriScheme)
622 # Check for datasets that should not exist
623 for ref, _ in fail:
624 # These should raise
625 with self.assertRaises(FileNotFoundError):
626 # non-existing file
627 datastore.get(ref)
628 with self.assertRaises(FileNotFoundError):
629 datastore.getURI(ref)
631 def testNestedTransaction(self) -> None:
632 datastore = self.makeDatastore()
633 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
634 dimensions = self.universe.extract(("visit", "physical_filter"))
635 metrics = makeExampleMetrics()
637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
639 datastore.put(metrics, refBefore)
640 with self.assertRaises(TransactionTestError):
641 with datastore.transaction():
642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"})
643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
644 datastore.put(metrics, refOuter)
645 with datastore.transaction():
646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"})
647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
648 datastore.put(metrics, refInner)
649 # All datasets should exist
650 for ref in (refBefore, refOuter, refInner):
651 metricsOut = datastore.get(ref, parameters=None)
652 self.assertEqual(metrics, metricsOut)
653 raise TransactionTestError("This should roll back the transaction")
654 # Dataset(s) inserted before the transaction should still exist
655 metricsOut = datastore.get(refBefore, parameters=None)
656 self.assertEqual(metrics, metricsOut)
657 # But all datasets inserted during the (rolled back) transaction
658 # should be gone
659 with self.assertRaises(FileNotFoundError):
660 datastore.get(refOuter)
661 with self.assertRaises(FileNotFoundError):
662 datastore.get(refInner)
664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]:
665 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
666 dimensions = self.universe.extract(("visit", "physical_filter"))
667 metrics = makeExampleMetrics()
668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
670 return metrics, ref
672 def runIngestTest(
673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True
674 ) -> None:
675 metrics, ref = self._prepareIngestTest()
676 # The file will be deleted after the test.
677 # For symlink tests this leads to a situation where the datastore
678 # points to a file that does not exist. This will make os.path.exist
679 # return False but then the new symlink will fail with
680 # FileExistsError later in the code so the test still passes.
681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path:
682 with open(path, "w") as fd:
683 yaml.dump(metrics._asdict(), stream=fd)
684 func(metrics, path, ref)
686 def testIngestNoTransfer(self) -> None:
687 """Test ingesting existing files with no transfer."""
688 for mode in (None, "auto"):
689 # Some datastores have auto but can't do in place transfer
690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto:
691 continue
693 with self.subTest(mode=mode):
694 datastore = self.makeDatastore()
696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
697 """Ingest a file already in the datastore root."""
698 # first move it into the root, and adjust the path
699 # accordingly
700 path = shutil.copy(path, datastore.root.ospath)
701 path = os.path.relpath(path, start=datastore.root.ospath)
702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
703 self.assertEqual(obj, datastore.get(ref))
705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
706 """Can't ingest files if we're given a bad path."""
707 with self.assertRaises(FileNotFoundError):
708 datastore.ingest(
709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode
710 )
711 self.assertFalse(datastore.exists(ref))
713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
714 """Can't ingest files outside of datastore root unless
715 auto."""
716 if mode == "auto":
717 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
718 self.assertTrue(datastore.exists(ref))
719 else:
720 with self.assertRaises(RuntimeError):
721 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
722 self.assertFalse(datastore.exists(ref))
724 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
725 with self.assertRaises(NotImplementedError):
726 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode)
728 if mode in self.ingestTransferModes:
729 self.runIngestTest(failOutsideRoot)
730 self.runIngestTest(failInputDoesNotExist)
731 self.runIngestTest(succeed)
732 else:
733 self.runIngestTest(failNotImplemented)
735 def testIngestTransfer(self) -> None:
736 """Test ingesting existing files after transferring them."""
737 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"):
738 with self.subTest(mode=mode):
739 datastore = self.makeDatastore(mode)
741 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
742 """Ingest a file by transferring it to the template
743 location."""
744 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
745 self.assertEqual(obj, datastore.get(ref))
747 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
748 """Can't ingest files if we're given a bad path."""
749 with self.assertRaises(FileNotFoundError):
750 # Ensure the file does not look like it is in
751 # datastore for auto mode
752 datastore.ingest(
753 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode
754 )
755 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}")
757 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None:
758 with self.assertRaises(NotImplementedError):
759 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode)
761 if mode in self.ingestTransferModes:
762 self.runIngestTest(failInputDoesNotExist)
763 self.runIngestTest(succeed, expectOutput=(mode != "move"))
764 else:
765 self.runIngestTest(failNotImplemented)
767 def testIngestSymlinkOfSymlink(self) -> None:
768 """Special test for symlink to a symlink ingest"""
769 metrics, ref = self._prepareIngestTest()
770 # The aim of this test is to create a dataset on disk, then
771 # create a symlink to it and finally ingest the symlink such that
772 # the symlink in the datastore points to the original dataset.
773 for mode in ("symlink", "relsymlink"):
774 if mode not in self.ingestTransferModes:
775 continue
777 print(f"Trying mode {mode}")
778 with lsst.utils.tests.getTempFilePath(".yaml") as realpath:
779 with open(realpath, "w") as fd:
780 yaml.dump(metrics._asdict(), stream=fd)
781 with lsst.utils.tests.getTempFilePath(".yaml") as sympath:
782 os.symlink(os.path.abspath(realpath), sympath)
784 datastore = self.makeDatastore()
785 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode)
787 uri = datastore.getURI(ref)
788 self.assertTrue(uri.isLocal, f"Check {uri.scheme}")
789 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink")
791 linkTarget = os.readlink(uri.ospath)
792 if mode == "relsymlink":
793 self.assertFalse(os.path.isabs(linkTarget))
794 else:
795 self.assertEqual(linkTarget, os.path.abspath(realpath))
797 # Check that we can get the dataset back regardless of mode
798 metric2 = datastore.get(ref)
799 self.assertEqual(metric2, metrics)
801 # Cleanup the file for next time round loop
802 # since it will get the same file name in store
803 datastore.remove(ref)
805 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]:
806 datastore = self.makeDatastore(name)
808 # For now only the FileDatastore can be used for this test.
809 # ChainedDatastore that only includes InMemoryDatastores have to be
810 # skipped as well.
811 for name in datastore.names:
812 if not name.startswith("InMemoryDatastore"):
813 break
814 else:
815 raise unittest.SkipTest("in-memory datastore does not support record export/import")
817 metrics = makeExampleMetrics()
818 dimensions = self.universe.extract(("visit", "physical_filter"))
819 sc = self.storageClassFactory.getStorageClass("StructuredData")
821 refs = []
822 for visit in (2048, 2049, 2050):
823 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"}
824 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
825 datastore.put(metrics, ref)
826 refs.append(ref)
827 return datastore, refs
829 def testExportImportRecords(self) -> None:
830 """Test for export_records and import_records methods."""
831 datastore, refs = self._populate_export_datastore("test_datastore")
832 for exported_refs in (refs, refs[1:]):
833 n_refs = len(exported_refs)
834 records = datastore.export_records(exported_refs)
835 self.assertGreater(len(records), 0)
836 self.assertTrue(set(records.keys()) <= set(datastore.names))
837 # In a ChainedDatastore each FileDatastore will have a complete set
838 for datastore_name in records:
839 record_data = records[datastore_name]
840 self.assertEqual(len(record_data.records), n_refs)
842 # Check that subsetting works, include non-existing dataset ID.
843 dataset_ids = {exported_refs[0].id, uuid.uuid4()}
844 subset = record_data.subset(dataset_ids)
845 assert subset is not None
846 self.assertEqual(len(subset.records), 1)
847 subset = record_data.subset({uuid.uuid4()})
848 self.assertIsNone(subset)
850 # Use the same datastore name to import relative path.
851 datastore2 = self.makeDatastore("test_datastore")
853 records = datastore.export_records(refs[1:])
854 datastore2.import_records(records)
856 with self.assertRaises(FileNotFoundError):
857 data = datastore2.get(refs[0])
858 data = datastore2.get(refs[1])
859 self.assertIsNotNone(data)
860 data = datastore2.get(refs[2])
861 self.assertIsNotNone(data)
863 def testExport(self) -> None:
864 datastore, refs = self._populate_export_datastore("test_datastore")
866 datasets = list(datastore.export(refs))
867 self.assertEqual(len(datasets), 3)
869 for transfer in (None, "auto"):
870 # Both will default to None
871 datasets = list(datastore.export(refs, transfer=transfer))
872 self.assertEqual(len(datasets), 3)
874 with self.assertRaises(TypeError):
875 list(datastore.export(refs, transfer="copy"))
877 with self.assertRaises(TypeError):
878 list(datastore.export(refs, directory="exportDir", transfer="move"))
880 # Create a new ref that is not known to the datastore and try to
881 # export it.
882 sc = self.storageClassFactory.getStorageClass("ThingOne")
883 dimensions = self.universe.extract(("visit", "physical_filter"))
884 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
885 ref = self.makeDatasetRef("metric", dimensions, sc, dataId)
886 with self.assertRaises(FileNotFoundError):
887 list(datastore.export(refs + [ref], transfer=None))
889 def test_pydantic_dict_storage_class_conversions(self) -> None:
890 """Test converting a dataset stored as a pydantic model into a dict on
891 read.
892 """
893 datastore = self.makeDatastore()
894 store_as_model = self.makeDatasetRef(
895 "store_as_model",
896 dimensions=self.universe.empty,
897 storageClass="DictConvertibleModel",
898 dataId=DataCoordinate.makeEmpty(self.universe),
899 )
900 content = {"a": "one", "b": "two"}
901 model = DictConvertibleModel.from_dict(content, extra="original content")
902 datastore.put(model, store_as_model)
903 retrieved_model = datastore.get(store_as_model)
904 self.assertEqual(retrieved_model, model)
905 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel"))
906 self.assertEqual(type(loaded), dict)
907 self.assertEqual(loaded, content)
909 def test_simple_class_put_get(self) -> None:
910 """Test that we can put and get a simple class with dict()
911 constructor."""
912 datastore = self.makeDatastore()
913 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
914 self._assert_different_puts(datastore, "MetricsExample", data)
916 def test_dataclass_put_get(self) -> None:
917 """Test that we can put and get a simple dataclass."""
918 datastore = self.makeDatastore()
919 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
920 self._assert_different_puts(datastore, "MetricsExampleDataclass", data)
922 def test_pydantic_put_get(self) -> None:
923 """Test that we can put and get a simple Pydantic model."""
924 datastore = self.makeDatastore()
925 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2})
926 self._assert_different_puts(datastore, "MetricsExampleModel", data)
928 def test_tuple_put_get(self) -> None:
929 """Test that we can put and get a tuple."""
930 datastore = self.makeDatastore()
931 data = tuple(["a", "b", 1])
932 self._assert_different_puts(datastore, "TupleExample", data)
934 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None:
935 refs = {
936 x: self.makeDatasetRef(
937 f"stora_as_{x}",
938 dimensions=self.universe.empty,
939 storageClass=f"{storageClass_root}{x}",
940 dataId=DataCoordinate.makeEmpty(self.universe),
941 )
942 for x in ["A", "B"]
943 }
945 for ref in refs.values():
946 datastore.put(data, ref)
948 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"]))
951class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase):
952 """PosixDatastore specialization"""
954 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
955 uriScheme = "file"
956 canIngestNoTransferAuto = True
957 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto")
958 isEphemeral = False
959 rootKeys = ("root",)
960 validationCanFail = True
962 def setUp(self) -> None:
963 # Override the working directory before calling the base class
964 self.root = tempfile.mkdtemp(dir=TESTDIR)
965 super().setUp()
967 def testAtomicWrite(self) -> None:
968 """Test that we write to a temporary and then rename"""
969 datastore = self.makeDatastore()
970 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
971 dimensions = self.universe.extract(("visit", "physical_filter"))
972 metrics = makeExampleMetrics()
974 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
975 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
977 with self.assertLogs("lsst.resources", "DEBUG") as cm:
978 datastore.put(metrics, ref)
979 move_logs = [ll for ll in cm.output if "transfer=" in ll]
980 self.assertIn("transfer=move", move_logs[0])
982 # And the transfer should be file to file.
983 self.assertEqual(move_logs[0].count("file://"), 2)
985 def testCanNotDeterminePutFormatterLocation(self) -> None:
986 """Verify that the expected exception is raised if the FileDatastore
987 can not determine the put formatter location."""
989 _ = makeExampleMetrics()
990 datastore = self.makeDatastore()
992 # Create multiple storage classes for testing different formulations
993 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
995 sccomp = StorageClass("Dummy")
996 compositeStorageClass = StorageClass(
997 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp}
998 )
1000 dimensions = self.universe.extract(("visit", "physical_filter"))
1001 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1003 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1004 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId)
1006 def raiser(ref: DatasetRef) -> None:
1007 raise DatasetTypeNotSupportedError()
1009 with unittest.mock.patch.object(
1010 lsst.daf.butler.datastores.fileDatastore.FileDatastore,
1011 "_determine_put_formatter_location",
1012 side_effect=raiser,
1013 ):
1014 # verify the non-composite ref execution path:
1015 with self.assertRaises(DatasetTypeNotSupportedError):
1016 datastore.getURIs(ref, predict=True)
1018 # verify the composite-ref execution path:
1019 with self.assertRaises(DatasetTypeNotSupportedError):
1020 datastore.getURIs(compRef, predict=True)
1023class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase):
1024 """Posix datastore tests but with checksums disabled."""
1026 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml")
1028 def testChecksum(self) -> None:
1029 """Ensure that checksums have not been calculated."""
1031 datastore = self.makeDatastore()
1032 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1033 dimensions = self.universe.extract(("visit", "physical_filter"))
1034 metrics = makeExampleMetrics()
1036 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"})
1037 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1039 # Configuration should have disabled checksum calculation
1040 datastore.put(metrics, ref)
1041 infos = datastore.getStoredItemsInfo(ref)
1042 self.assertIsNone(infos[0].checksum)
1044 # Remove put back but with checksums enabled explicitly
1045 datastore.remove(ref)
1046 datastore.useChecksum = True
1047 datastore.put(metrics, ref)
1049 infos = datastore.getStoredItemsInfo(ref)
1050 self.assertIsNotNone(infos[0].checksum)
1053class TrashDatastoreTestCase(PosixDatastoreTestCase):
1054 """Restrict trash test to FileDatastore."""
1056 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1058 def testTrash(self) -> None:
1059 datastore, *refs = self.prepDeleteTest(n_refs=10)
1061 # Trash one of them.
1062 ref = refs.pop()
1063 uri = datastore.getURI(ref)
1064 datastore.trash(ref)
1065 self.assertTrue(uri.exists(), uri) # Not deleted yet
1066 datastore.emptyTrash()
1067 self.assertFalse(uri.exists(), uri)
1069 # Trash it again should be fine.
1070 datastore.trash(ref)
1072 # Trash multiple items at once.
1073 subset = [refs.pop(), refs.pop()]
1074 datastore.trash(subset)
1075 datastore.emptyTrash()
1077 # Remove a record and trash should do nothing.
1078 # This is execution butler scenario.
1079 ref = refs.pop()
1080 uri = datastore.getURI(ref)
1081 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1082 self.assertTrue(uri.exists())
1083 datastore.trash(ref)
1084 datastore.emptyTrash()
1085 self.assertTrue(uri.exists())
1087 # Switch on trust and it should delete the file.
1088 datastore.trustGetRequest = True
1089 datastore.trash([ref])
1090 self.assertFalse(uri.exists())
1092 # Remove multiples at once in trust mode.
1093 subset = [refs.pop() for i in range(3)]
1094 datastore.trash(subset)
1095 datastore.trash(refs.pop()) # Check that a single ref can trash
1098class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase):
1099 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1101 def setUp(self) -> None:
1102 # Override the working directory before calling the base class
1103 self.root = tempfile.mkdtemp(dir=TESTDIR)
1104 super().setUp()
1106 def testCleanup(self) -> None:
1107 """Test that a failed formatter write does cleanup a partial file."""
1108 metrics = makeExampleMetrics()
1109 datastore = self.makeDatastore()
1111 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1113 dimensions = self.universe.extract(("visit", "physical_filter"))
1114 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1116 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId)
1118 # Determine where the file will end up (we assume Formatters use
1119 # the same file extension)
1120 expectedUri = datastore.getURI(ref, predict=True)
1121 self.assertEqual(expectedUri.fragment, "predicted")
1123 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}")
1125 # Try formatter that fails and formatter that fails and leaves
1126 # a file behind
1127 for formatter in (BadWriteFormatter, BadNoWriteFormatter):
1128 with self.subTest(formatter=formatter):
1129 # Monkey patch the formatter
1130 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True)
1132 # Try to put the dataset, it should fail
1133 with self.assertRaises(Exception):
1134 datastore.put(metrics, ref)
1136 # Check that there is no file on disk
1137 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}")
1139 # Check that there is a directory
1140 dir = expectedUri.dirname()
1141 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}")
1143 # Force YamlFormatter and check that this time a file is written
1144 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True)
1145 datastore.put(metrics, ref)
1146 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}")
1147 datastore.remove(ref)
1148 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}")
1151class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase):
1152 """PosixDatastore specialization"""
1154 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml")
1155 uriScheme = "mem"
1156 hasUnsupportedPut = False
1157 ingestTransferModes = ()
1158 isEphemeral = True
1159 rootKeys = None
1160 validationCanFail = False
1163class ChainedDatastoreTestCase(PosixDatastoreTestCase):
1164 """ChainedDatastore specialization using a POSIXDatastore"""
1166 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml")
1167 hasUnsupportedPut = False
1168 canIngestNoTransferAuto = False
1169 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto")
1170 isEphemeral = False
1171 rootKeys = (".datastores.1.root", ".datastores.2.root")
1172 validationCanFail = True
1175class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase):
1176 """ChainedDatastore specialization using all InMemoryDatastore"""
1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml")
1179 validationCanFail = False
1182class DatastoreConstraintsTests(DatastoreTestsBase):
1183 """Basic tests of constraints model of Datastores."""
1185 def testConstraints(self) -> None:
1186 """Test constraints model. Assumes that each test class has the
1187 same constraints."""
1188 metrics = makeExampleMetrics()
1189 datastore = self.makeDatastore()
1191 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1192 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1193 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1194 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"})
1196 # Write empty file suitable for ingest check (JSON and YAML variants)
1197 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1198 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1199 for datasetTypeName, sc, accepted in (
1200 ("metric", sc1, True),
1201 ("metric5", sc1, False),
1202 ("metric33", sc1, True),
1203 ("metric5", sc2, True),
1204 ):
1205 # Choose different temp file depending on StorageClass
1206 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1208 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name):
1209 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId)
1210 if accepted:
1211 datastore.put(metrics, ref)
1212 self.assertTrue(datastore.exists(ref))
1213 datastore.remove(ref)
1215 # Try ingest
1216 if self.canIngest:
1217 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1218 self.assertTrue(datastore.exists(ref))
1219 datastore.remove(ref)
1220 else:
1221 with self.assertRaises(DatasetTypeNotSupportedError):
1222 datastore.put(metrics, ref)
1223 self.assertFalse(datastore.exists(ref))
1225 # Again with ingest
1226 if self.canIngest:
1227 with self.assertRaises(DatasetTypeNotSupportedError):
1228 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1229 self.assertFalse(datastore.exists(ref))
1232class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1233 """PosixDatastore specialization"""
1235 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml")
1236 canIngest = True
1238 def setUp(self) -> None:
1239 # Override the working directory before calling the base class
1240 self.root = tempfile.mkdtemp(dir=TESTDIR)
1241 super().setUp()
1244class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase):
1245 """InMemoryDatastore specialization"""
1247 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml")
1248 canIngest = False
1251class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase):
1252 """ChainedDatastore specialization using a POSIXDatastore and constraints
1253 at the ChainedDatstore"""
1255 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml")
1258class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase):
1259 """ChainedDatastore specialization using a POSIXDatastore"""
1261 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml")
1264class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase):
1265 """ChainedDatastore specialization using all InMemoryDatastore"""
1267 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml")
1268 canIngest = False
1271class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase):
1272 """Test that a chained datastore can control constraints per-datastore
1273 even if child datastore would accept."""
1275 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml")
1277 def setUp(self) -> None:
1278 # Override the working directory before calling the base class
1279 self.root = tempfile.mkdtemp(dir=TESTDIR)
1280 super().setUp()
1282 def testConstraints(self) -> None:
1283 """Test chained datastore constraints model."""
1284 metrics = makeExampleMetrics()
1285 datastore = self.makeDatastore()
1287 sc1 = self.storageClassFactory.getStorageClass("StructuredData")
1288 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson")
1289 dimensions = self.universe.extract(("visit", "physical_filter", "instrument"))
1290 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}
1291 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"}
1293 # Write empty file suitable for ingest check (JSON and YAML variants)
1294 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml")
1295 testfile_j = tempfile.NamedTemporaryFile(suffix=".json")
1297 for typeName, dataId, sc, accept, ingest in (
1298 ("metric", dataId1, sc1, (False, True, False), True),
1299 ("metric5", dataId1, sc1, (False, False, False), False),
1300 ("metric5", dataId2, sc1, (True, False, False), False),
1301 ("metric33", dataId2, sc2, (True, True, False), True),
1302 ("metric5", dataId1, sc2, (False, True, False), True),
1303 ):
1304 # Choose different temp file depending on StorageClass
1305 testfile = testfile_j if sc.name.endswith("Json") else testfile_y
1307 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name):
1308 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId)
1309 if any(accept):
1310 datastore.put(metrics, ref)
1311 self.assertTrue(datastore.exists(ref))
1313 # Check each datastore inside the chained datastore
1314 for childDatastore, expected in zip(datastore.datastores, accept):
1315 self.assertEqual(
1316 childDatastore.exists(ref),
1317 expected,
1318 f"Testing presence of {ref} in datastore {childDatastore.name}",
1319 )
1321 datastore.remove(ref)
1323 # Check that ingest works
1324 if ingest:
1325 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1326 self.assertTrue(datastore.exists(ref))
1328 # Check each datastore inside the chained datastore
1329 for childDatastore, expected in zip(datastore.datastores, accept):
1330 # Ephemeral datastores means InMemory at the moment
1331 # and that does not accept ingest of files.
1332 if childDatastore.isEphemeral:
1333 expected = False
1334 self.assertEqual(
1335 childDatastore.exists(ref),
1336 expected,
1337 f"Testing presence of ingested {ref} in datastore {childDatastore.name}",
1338 )
1340 datastore.remove(ref)
1341 else:
1342 with self.assertRaises(DatasetTypeNotSupportedError):
1343 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1345 else:
1346 with self.assertRaises(DatasetTypeNotSupportedError):
1347 datastore.put(metrics, ref)
1348 self.assertFalse(datastore.exists(ref))
1350 # Again with ingest
1351 with self.assertRaises(DatasetTypeNotSupportedError):
1352 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link")
1353 self.assertFalse(datastore.exists(ref))
1356class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase):
1357 """Tests for datastore caching infrastructure."""
1359 @classmethod
1360 def setUpClass(cls) -> None:
1361 cls.storageClassFactory = StorageClassFactory()
1362 cls.universe = DimensionUniverse()
1364 # Ensure that we load the test storage class definitions.
1365 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml")
1366 cls.storageClassFactory.addFromConfig(scConfigFile)
1368 def setUp(self) -> None:
1369 self.id = 0
1371 # Create a root that we can use for caching tests.
1372 self.root = tempfile.mkdtemp(dir=TESTDIR)
1374 # Create some test dataset refs and associated test files
1375 sc = self.storageClassFactory.getStorageClass("StructuredDataDict")
1376 dimensions = self.universe.extract(("visit", "physical_filter"))
1377 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"})
1379 # Create list of refs and list of temporary files
1380 n_datasets = 10
1381 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)]
1383 root_uri = ResourcePath(self.root, forceDirectory=True)
1384 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)]
1386 # Create test files.
1387 for uri in self.files:
1388 uri.write(b"0123456789")
1390 # Create some composite refs with component files.
1391 sc = self.storageClassFactory.getStorageClass("StructuredData")
1392 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)]
1393 self.comp_files = []
1394 self.comp_refs = []
1395 for n, ref in enumerate(self.composite_refs):
1396 component_refs = []
1397 component_files = []
1398 for component in sc.components:
1399 component_ref = ref.makeComponentRef(component)
1400 file = root_uri.join(f"composite_file-{n}-{component}.txt")
1401 component_refs.append(component_ref)
1402 component_files.append(file)
1403 file.write(b"9876543210")
1405 self.comp_files.append(component_files)
1406 self.comp_refs.append(component_refs)
1408 def tearDown(self) -> None:
1409 if self.root is not None and os.path.exists(self.root):
1410 shutil.rmtree(self.root, ignore_errors=True)
1412 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager:
1413 config = Config.fromYaml(config_str)
1414 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe)
1416 def testNoCacheDir(self) -> None:
1417 config_str = """
1418cached:
1419 root: null
1420 cacheable:
1421 metric0: true
1422 """
1423 cache_manager = self._make_cache_manager(config_str)
1425 # Look inside to check we don't have a cache directory
1426 self.assertIsNone(cache_manager._cache_directory)
1428 self.assertCache(cache_manager)
1430 # Test that the cache directory is marked temporary
1431 self.assertTrue(cache_manager.cache_directory.isTemporary)
1433 def testNoCacheDirReversed(self) -> None:
1434 """Use default caching status and metric1 to false"""
1435 config_str = """
1436cached:
1437 root: null
1438 default: true
1439 cacheable:
1440 metric1: false
1441 """
1442 cache_manager = self._make_cache_manager(config_str)
1444 self.assertCache(cache_manager)
1446 def testEnvvarCacheDir(self) -> None:
1447 config_str = f"""
1448cached:
1449 root: '{self.root}'
1450 cacheable:
1451 metric0: true
1452 """
1454 root = ResourcePath(self.root, forceDirectory=True)
1455 env_dir = root.join("somewhere", forceDirectory=True)
1456 elsewhere = root.join("elsewhere", forceDirectory=True)
1458 # Environment variable should override the config value.
1459 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}):
1460 cache_manager = self._make_cache_manager(config_str)
1461 self.assertEqual(cache_manager.cache_directory, env_dir)
1463 # This environment variable should not override the config value.
1464 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1465 cache_manager = self._make_cache_manager(config_str)
1466 self.assertEqual(cache_manager.cache_directory, root)
1468 # No default setting.
1469 config_str = """
1470cached:
1471 root: null
1472 default: true
1473 cacheable:
1474 metric1: false
1475 """
1476 cache_manager = self._make_cache_manager(config_str)
1478 # This environment variable should override the config value.
1479 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}):
1480 cache_manager = self._make_cache_manager(config_str)
1481 self.assertEqual(cache_manager.cache_directory, env_dir)
1483 # If both environment variables are set the main (not IF_UNSET)
1484 # variable should win.
1485 with unittest.mock.patch.dict(
1486 os.environ,
1487 {
1488 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath,
1489 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath,
1490 },
1491 ):
1492 cache_manager = self._make_cache_manager(config_str)
1493 self.assertEqual(cache_manager.cache_directory, env_dir)
1495 # Use the API to set the environment variable, making sure that the
1496 # variable is reset on exit.
1497 with unittest.mock.patch.dict(
1498 os.environ,
1499 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1500 ):
1501 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1502 self.assertTrue(defined)
1503 cache_manager = self._make_cache_manager(config_str)
1504 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1506 # Now create the cache manager ahead of time and set the fallback
1507 # later.
1508 cache_manager = self._make_cache_manager(config_str)
1509 self.assertIsNone(cache_manager._cache_directory)
1510 with unittest.mock.patch.dict(
1511 os.environ,
1512 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""},
1513 ):
1514 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
1515 self.assertTrue(defined)
1516 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True))
1518 def testExplicitCacheDir(self) -> None:
1519 config_str = f"""
1520cached:
1521 root: '{self.root}'
1522 cacheable:
1523 metric0: true
1524 """
1525 cache_manager = self._make_cache_manager(config_str)
1527 # Look inside to check we do have a cache directory.
1528 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True))
1530 self.assertCache(cache_manager)
1532 # Test that the cache directory is not marked temporary
1533 self.assertFalse(cache_manager.cache_directory.isTemporary)
1535 def assertCache(self, cache_manager: DatastoreCacheManager) -> None:
1536 self.assertTrue(cache_manager.should_be_cached(self.refs[0]))
1537 self.assertFalse(cache_manager.should_be_cached(self.refs[1]))
1539 uri = cache_manager.move_to_cache(self.files[0], self.refs[0])
1540 self.assertIsInstance(uri, ResourcePath)
1541 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1]))
1543 # Check presence in cache using ref and then using file extension.
1544 self.assertFalse(cache_manager.known_to_cache(self.refs[1]))
1545 self.assertTrue(cache_manager.known_to_cache(self.refs[0]))
1546 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension()))
1547 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension()))
1549 # Cached file should no longer exist but uncached file should be
1550 # unaffected.
1551 self.assertFalse(self.files[0].exists())
1552 self.assertTrue(self.files[1].exists())
1554 # Should find this file and it should be within the cache directory.
1555 with cache_manager.find_in_cache(self.refs[0], ".txt") as found:
1556 self.assertTrue(found.exists())
1557 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory))
1559 # Should not be able to find these in cache
1560 with cache_manager.find_in_cache(self.refs[0], ".fits") as found:
1561 self.assertIsNone(found)
1562 with cache_manager.find_in_cache(self.refs[1], ".fits") as found:
1563 self.assertIsNone(found)
1565 def testNoCache(self) -> None:
1566 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe)
1567 for uri, ref in zip(self.files, self.refs):
1568 self.assertFalse(cache_manager.should_be_cached(ref))
1569 self.assertIsNone(cache_manager.move_to_cache(uri, ref))
1570 self.assertFalse(cache_manager.known_to_cache(ref))
1571 with cache_manager.find_in_cache(ref, ".txt") as found:
1572 self.assertIsNone(found, msg=f"{cache_manager}")
1574 def _expiration_config(self, mode: str, threshold: int) -> str:
1575 return f"""
1576cached:
1577 default: true
1578 expiry:
1579 mode: {mode}
1580 threshold: {threshold}
1581 cacheable:
1582 unused: true
1583 """
1585 def testCacheExpiryFiles(self) -> None:
1586 threshold = 2 # Keep at least 2 files.
1587 mode = "files"
1588 config_str = self._expiration_config(mode, threshold)
1590 cache_manager = self._make_cache_manager(config_str)
1592 # Check that an empty cache returns unknown for arbitrary ref
1593 self.assertFalse(cache_manager.known_to_cache(self.refs[0]))
1595 # Should end with datasets: 2, 3, 4
1596 self.assertExpiration(cache_manager, 5, threshold + 1)
1597 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1599 # Check that we will not expire a file that is actively in use.
1600 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1601 self.assertIsNotNone(found)
1603 # Trigger cache expiration that should remove the file
1604 # we just retrieved. Should now have: 3, 4, 5
1605 cached = cache_manager.move_to_cache(self.files[5], self.refs[5])
1606 self.assertIsNotNone(cached)
1608 # Cache should still report the standard file count.
1609 self.assertEqual(cache_manager.file_count, threshold + 1)
1611 # Add additional entry to cache.
1612 # Should now have 4, 5, 6
1613 cached = cache_manager.move_to_cache(self.files[6], self.refs[6])
1614 self.assertIsNotNone(cached)
1616 # Is the file still there?
1617 self.assertTrue(found.exists())
1619 # Can we read it?
1620 data = found.read()
1621 self.assertGreater(len(data), 0)
1623 # Outside context the file should no longer exist.
1624 self.assertFalse(found.exists())
1626 # File count should not have changed.
1627 self.assertEqual(cache_manager.file_count, threshold + 1)
1629 # Dataset 2 was in the exempt directory but because hardlinks
1630 # are used it was deleted from the main cache during cache expiry
1631 # above and so should no longer be found.
1632 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1633 self.assertIsNone(found)
1635 # And the one stored after it is also gone.
1636 with cache_manager.find_in_cache(self.refs[3], ".txt") as found:
1637 self.assertIsNone(found)
1639 # But dataset 4 is present.
1640 with cache_manager.find_in_cache(self.refs[4], ".txt") as found:
1641 self.assertIsNotNone(found)
1643 # Adding a new dataset to the cache should now delete it.
1644 cache_manager.move_to_cache(self.files[7], self.refs[7])
1646 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1647 self.assertIsNone(found)
1649 def testCacheExpiryDatasets(self) -> None:
1650 threshold = 2 # Keep 2 datasets.
1651 mode = "datasets"
1652 config_str = self._expiration_config(mode, threshold)
1654 cache_manager = self._make_cache_manager(config_str)
1655 self.assertExpiration(cache_manager, 5, threshold + 1)
1656 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1658 def testCacheExpiryDatasetsComposite(self) -> None:
1659 threshold = 2 # Keep 2 datasets.
1660 mode = "datasets"
1661 config_str = self._expiration_config(mode, threshold)
1663 cache_manager = self._make_cache_manager(config_str)
1665 n_datasets = 3
1666 for i in range(n_datasets):
1667 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]):
1668 cached = cache_manager.move_to_cache(component_file, component_ref)
1669 self.assertIsNotNone(cached)
1670 self.assertTrue(cache_manager.known_to_cache(component_ref))
1671 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef()))
1672 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension()))
1674 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files
1676 # Write two new non-composite and the number of files should drop.
1677 self.assertExpiration(cache_manager, 2, 5)
1679 def testCacheExpirySize(self) -> None:
1680 threshold = 55 # Each file is 10 bytes
1681 mode = "size"
1682 config_str = self._expiration_config(mode, threshold)
1684 cache_manager = self._make_cache_manager(config_str)
1685 self.assertExpiration(cache_manager, 10, 6)
1686 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1688 def assertExpiration(
1689 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int
1690 ) -> None:
1691 """Insert the datasets and then check the number retained."""
1692 for i in range(n_datasets):
1693 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1694 self.assertIsNotNone(cached)
1696 self.assertEqual(cache_manager.file_count, n_retained)
1698 # The oldest file should not be in the cache any more.
1699 for i in range(n_datasets):
1700 with cache_manager.find_in_cache(self.refs[i], ".txt") as found:
1701 if i >= n_datasets - n_retained:
1702 self.assertIsInstance(found, ResourcePath)
1703 else:
1704 self.assertIsNone(found)
1706 def testCacheExpiryAge(self) -> None:
1707 threshold = 1 # Expire older than 2 seconds
1708 mode = "age"
1709 config_str = self._expiration_config(mode, threshold)
1711 cache_manager = self._make_cache_manager(config_str)
1712 self.assertIn(f"{mode}={threshold}", str(cache_manager))
1714 # Insert 3 files, then sleep, then insert more.
1715 for i in range(2):
1716 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1717 self.assertIsNotNone(cached)
1718 time.sleep(2.0)
1719 for j in range(4):
1720 i = 2 + j # Continue the counting
1721 cached = cache_manager.move_to_cache(self.files[i], self.refs[i])
1722 self.assertIsNotNone(cached)
1724 # Only the files written after the sleep should exist.
1725 self.assertEqual(cache_manager.file_count, 4)
1726 with cache_manager.find_in_cache(self.refs[1], ".txt") as found:
1727 self.assertIsNone(found)
1728 with cache_manager.find_in_cache(self.refs[2], ".txt") as found:
1729 self.assertIsInstance(found, ResourcePath)
1732class DatasetRefURIsTestCase(unittest.TestCase):
1733 """Tests for DatasetRefURIs."""
1735 def testSequenceAccess(self) -> None:
1736 """Verify that DatasetRefURIs can be treated like a two-item tuple."""
1737 uris = DatasetRefURIs()
1739 self.assertEqual(len(uris), 2)
1740 self.assertEqual(uris[0], None)
1741 self.assertEqual(uris[1], {})
1743 primaryURI = ResourcePath("1/2/3")
1744 componentURI = ResourcePath("a/b/c")
1746 # affirm that DatasetRefURIs does not support MutableSequence functions
1747 with self.assertRaises(TypeError):
1748 uris[0] = primaryURI
1749 with self.assertRaises(TypeError):
1750 uris[1] = {"foo": componentURI}
1752 # but DatasetRefURIs can be set by property name:
1753 uris.primaryURI = primaryURI
1754 uris.componentURIs = {"foo": componentURI}
1755 self.assertEqual(uris.primaryURI, primaryURI)
1756 self.assertEqual(uris[0], primaryURI)
1758 primary, components = uris
1759 self.assertEqual(primary, primaryURI)
1760 self.assertEqual(components, {"foo": componentURI})
1762 def testRepr(self) -> None:
1763 """Verify __repr__ output."""
1764 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")})
1765 self.assertEqual(
1766 repr(uris),
1767 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})',
1768 )
1771class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase):
1772 storageClassFactory = StorageClassFactory()
1774 def test_StoredFileInfo(self) -> None:
1775 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1776 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1778 record = dict(
1779 storage_class="StructuredDataDict",
1780 formatter="lsst.daf.butler.Formatter",
1781 path="a/b/c.txt",
1782 component="component",
1783 dataset_id=ref.id,
1784 checksum=None,
1785 file_size=5,
1786 )
1787 info = StoredFileInfo.from_record(record)
1789 self.assertEqual(info.dataset_id, ref.id)
1790 self.assertEqual(info.to_record(), record)
1792 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {})
1793 rebased = info.rebase(ref2)
1794 self.assertEqual(rebased.dataset_id, ref2.id)
1795 self.assertEqual(rebased.rebase(ref), info)
1797 with self.assertRaises(TypeError):
1798 rebased.update(formatter=42)
1800 with self.assertRaises(ValueError):
1801 rebased.update(something=42, new="42")
1803 # Check that pickle works on StoredFileInfo.
1804 pickled_info = pickle.dumps(info)
1805 unpickled_info = pickle.loads(pickled_info)
1806 self.assertEqual(unpickled_info, info)
1809if __name__ == "__main__":
1810 unittest.main()