Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46from lsst.utils import doImport
47from lsst.daf.butler.core.safeFileIo import safeMakeDir
48from lsst.daf.butler import Butler, Config, ButlerConfig
49from lsst.daf.butler import StorageClassFactory
50from lsst.daf.butler import DatasetType, DatasetRef
51from lsst.daf.butler import FileTemplateValidationError, ValidationError
52from lsst.daf.butler import FileDataset
53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
54from lsst.daf.butler.core.location import ButlerURI
55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
56 unsetAwsEnvCredentials)
58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
60TESTDIR = os.path.abspath(os.path.dirname(__file__))
63def makeExampleMetrics():
64 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
65 {"a": [1, 2, 3],
66 "b": {"blue": 5, "red": "green"}},
67 [563, 234, 456.7, 752, 8, 9, 27]
68 )
71class TransactionTestError(Exception):
72 """Specific error for testing transactions, to prevent misdiagnosing
73 that might otherwise occur when a standard exception is used.
74 """
75 pass
78class ButlerConfigTests(unittest.TestCase):
79 """Simple tests for ButlerConfig that are not tested in other test cases.
80 """
82 def testSearchPath(self):
83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
85 config1 = ButlerConfig(configFile)
86 self.assertNotIn("testConfigs", "\n".join(cm.output))
88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
91 self.assertIn("testConfigs", "\n".join(cm.output))
93 key = ("datastore", "records", "table")
94 self.assertNotEqual(config1[key], config2[key])
95 self.assertEqual(config2[key], "override_record")
98class ButlerPutGetTests:
99 """Helper method for running a suite of put/get tests from different
100 butler configurations."""
102 root = None
104 @staticmethod
105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
106 """Create a DatasetType and register it
107 """
108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
109 registry.registerDatasetType(datasetType)
110 return datasetType
112 @classmethod
113 def setUpClass(cls):
114 cls.storageClassFactory = StorageClassFactory()
115 cls.storageClassFactory.addFromConfig(cls.configFile)
117 def assertGetComponents(self, butler, datasetRef, components, reference):
118 datasetTypeName = datasetRef.datasetType.name
119 dataId = datasetRef.dataId
120 for component in components:
121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
122 result = butler.get(compTypeName, dataId)
123 self.assertEqual(result, getattr(reference, component))
125 def tearDown(self):
126 if self.root is not None and os.path.exists(self.root):
127 shutil.rmtree(self.root, ignore_errors=True)
129 def runPutGetTest(self, storageClass, datasetTypeName):
130 butler = Butler(self.tmpConfigFile, run="ingest")
132 # There will not be a collection yet
133 collections = butler.registry.getAllCollections()
134 self.assertEqual(collections, set())
136 # Create and register a DatasetType
137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
141 # Add needed Dimensions
142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
144 "name": "d-r",
145 "abstract_filter": "R"})
146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
147 "name": "fourtwentythree", "physical_filter": "d-r"})
149 # Create and store a dataset
150 metric = makeExampleMetrics()
151 dataId = {"instrument": "DummyCamComp", "visit": 423}
153 # Create a DatasetRef for put
154 refIn = DatasetRef(datasetType, dataId, id=None)
156 # Put with a preexisting id should fail
157 with self.assertRaises(ValueError):
158 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
160 # Put and remove the dataset once as a DatasetRef, once as a dataId,
161 # and once with a DatasetType
162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
163 with self.subTest(args=args):
164 ref = butler.put(metric, *args)
165 self.assertIsInstance(ref, DatasetRef)
167 # Test getDirect
168 metricOut = butler.getDirect(ref)
169 self.assertEqual(metric, metricOut)
170 # Test get
171 metricOut = butler.get(ref.datasetType.name, dataId)
172 self.assertEqual(metric, metricOut)
173 # Test get with a datasetRef
174 metricOut = butler.get(ref)
175 self.assertEqual(metric, metricOut)
176 # Test getDeferred with dataId
177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
178 self.assertEqual(metric, metricOut)
179 # Test getDeferred with a datasetRef
180 metricOut = butler.getDeferred(ref).get()
181 self.assertEqual(metric, metricOut)
183 # Check we can get components
184 if storageClass.isComposite():
185 self.assertGetComponents(butler, ref,
186 ("summary", "data", "output"), metric)
188 # Remove from collection only; after that we shouldn't be able
189 # to find it unless we use the dataset_id.
190 butler.remove(*args, delete=False)
191 with self.assertRaises(LookupError):
192 butler.datasetExists(*args)
193 # If we use the output ref with the dataset_id, we should
194 # still be able to load it with getDirect().
195 self.assertEqual(metric, butler.getDirect(ref))
197 # Reinsert into collection, then delete from Datastore *and*
198 # remove from collection.
199 butler.registry.associate(butler.collection, [ref])
200 butler.remove(*args)
201 # Lookup with original args should still fail.
202 with self.assertRaises(LookupError):
203 butler.datasetExists(*args)
204 # Now getDirect() should fail, too.
205 with self.assertRaises(FileNotFoundError):
206 butler.getDirect(ref)
207 # Registry still knows about it, if we use the dataset_id.
208 self.assertEqual(butler.registry.getDataset(ref.id), ref)
210 # Put again, then remove completely (this generates a new
211 # dataset record in registry, with a new ID - the old one
212 # still exists but it is not in any collection so we don't
213 # care).
214 ref = butler.put(metric, *args)
215 butler.remove(*args, remember=False)
216 # Lookup with original args should still fail.
217 with self.assertRaises(LookupError):
218 butler.datasetExists(*args)
219 # getDirect() should still fail.
220 with self.assertRaises(FileNotFoundError):
221 butler.getDirect(ref)
222 # Registry shouldn't be able to find it by dataset_id anymore.
223 self.assertIsNone(butler.registry.getDataset(ref.id))
225 # Put the dataset again, since the last thing we did was remove it.
226 ref = butler.put(metric, refIn)
228 # Get with parameters
229 stop = 4
230 sliced = butler.get(ref, parameters={"slice": slice(stop)})
231 self.assertNotEqual(metric, sliced)
232 self.assertEqual(metric.summary, sliced.summary)
233 self.assertEqual(metric.output, sliced.output)
234 self.assertEqual(metric.data[:stop], sliced.data)
235 # getDeferred with parameters
236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
237 self.assertNotEqual(metric, sliced)
238 self.assertEqual(metric.summary, sliced.summary)
239 self.assertEqual(metric.output, sliced.output)
240 self.assertEqual(metric.data[:stop], sliced.data)
241 # getDeferred with deferred parameters
242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
243 self.assertNotEqual(metric, sliced)
244 self.assertEqual(metric.summary, sliced.summary)
245 self.assertEqual(metric.output, sliced.output)
246 self.assertEqual(metric.data[:stop], sliced.data)
248 if storageClass.isComposite():
249 # Delete one component and check that the other components
250 # can still be retrieved
251 metricOut = butler.get(ref.datasetType.name, dataId)
252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
254 summary = butler.get(compNameS, dataId)
255 self.assertEqual(summary, metric.summary)
256 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
258 butler.remove(compNameS, dataId, remember=True)
259 with self.assertRaises(LookupError):
260 butler.datasetExists(compNameS, dataId)
261 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
262 self.assertTrue(butler.datastore.exists(ref.components["data"]))
263 data = butler.get(compNameD, dataId)
264 self.assertEqual(data, metric.data)
266 # Create a Dataset type that has the same name but is inconsistent.
267 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
268 self.storageClassFactory.getStorageClass("Config"))
270 # Getting with a dataset type that does not match registry fails
271 with self.assertRaises(ValueError):
272 butler.get(inconsistentDatasetType, dataId)
274 # Combining a DatasetRef with a dataId should fail
275 with self.assertRaises(ValueError):
276 butler.get(ref, dataId)
277 # Getting with an explicit ref should fail if the id doesn't match
278 with self.assertRaises(ValueError):
279 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
281 # Getting a dataset with unknown parameters should fail
282 with self.assertRaises(KeyError):
283 butler.get(ref, parameters={"unsupported": True})
285 # Check we have a collection
286 collections = butler.registry.getAllCollections()
287 self.assertEqual(collections, {"ingest", })
289 # Clean up to check that we can remove something that may have
290 # already had a component removed
291 butler.remove(ref.datasetType.name, dataId)
293 # Add a dataset back in since some downstream tests require
294 # something to be present
295 ref = butler.put(metric, refIn)
297 return butler
299 # Construct a butler with no run or collection, but make it writeable.
300 butler = Butler(self.tmpConfigFile, writeable=True)
301 # Create and register a DatasetType
302 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
303 datasetType = self.addDatasetType("example", dimensions,
304 self.storageClassFactory.getStorageClass("StructuredData"),
305 butler.registry)
306 # Add needed Dimensions
307 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
308 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
309 "name": "d-r",
310 "abstract_filter": "R"})
311 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
312 "name": "fourtwentythree", "physical_filter": "d-r"})
313 dataId = {"instrument": "DummyCamComp", "visit": 423}
314 # Create dataset.
315 metric = makeExampleMetrics()
316 # Register a new run and put dataset.
317 run = "deferred"
318 butler.registry.registerRun(run)
319 ref = butler.put(metric, datasetType, dataId, run=run)
320 # Putting with no run should fail with TypeError.
321 with self.assertRaises(TypeError):
322 butler.put(metric, datasetType, dataId)
323 # Dataset should exist.
324 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run))
325 # We should be able to get the dataset back, but with and without
326 # a deferred dataset handle.
327 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run))
328 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get())
329 # Trying to find the dataset without any collection is a TypeError.
330 with self.assertRaises(TypeError):
331 butler.datasetExists(datasetType, dataId)
332 with self.assertRaises(TypeError):
333 butler.get(datasetType, dataId)
334 with self.assertRaises(TypeError):
335 butler.remove(datasetType, dataId)
336 # Associate the dataset with a different collection.
337 butler.registry.associate("tagged", [ref])
338 # Deleting the dataset from the new collection should make it findable
339 # in the original collection but without a Datastore entry.
340 butler.remove(datasetType, dataId, collection="tagged")
341 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run))
344class ButlerTests(ButlerPutGetTests):
345 """Tests for Butler.
346 """
347 useTempRoot = True
349 def setUp(self):
350 """Create a new butler root for each test."""
351 if self.useTempRoot:
352 self.root = tempfile.mkdtemp(dir=TESTDIR)
353 Butler.makeRepo(self.root, config=Config(self.configFile))
354 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
355 else:
356 self.root = None
357 self.tmpConfigFile = self.configFile
359 def testConstructor(self):
360 """Independent test of constructor.
361 """
362 butler = Butler(self.tmpConfigFile, run="ingest")
363 self.assertIsInstance(butler, Butler)
365 collections = butler.registry.getAllCollections()
366 self.assertEqual(collections, set())
368 butler2 = Butler(butler=butler, collection="other")
369 self.assertEqual(butler2.collection, "other")
370 self.assertIsNone(butler2.run)
371 self.assertIs(butler.registry, butler2.registry)
372 self.assertIs(butler.datastore, butler2.datastore)
374 def testBasicPutGet(self):
375 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
376 self.runPutGetTest(storageClass, "test_metric")
378 def testCompositePutGetConcrete(self):
379 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
380 self.runPutGetTest(storageClass, "test_metric")
382 def testCompositePutGetVirtual(self):
383 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
384 self.runPutGetTest(storageClass, "test_metric_comp")
386 def testIngest(self):
387 butler = Butler(self.tmpConfigFile, run="ingest")
389 # Create and register a DatasetType
390 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
392 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
393 datasetTypeName = "metric"
395 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
397 # Add needed Dimensions
398 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
399 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
400 "name": "d-r",
401 "abstract_filter": "R"})
402 for detector in (1, 2):
403 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
404 "full_name": f"detector{detector}"})
406 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
407 "name": "fourtwentythree", "physical_filter": "d-r"},
408 {"instrument": "DummyCamComp", "id": 424,
409 "name": "fourtwentyfour", "physical_filter": "d-r"})
411 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
412 dataRoot = os.path.join(TESTDIR, "data", "basic")
413 datasets = []
414 for detector in (1, 2):
415 detector_name = f"detector_{detector}"
416 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
417 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
418 # Create a DatasetRef for ingest
419 refIn = DatasetRef(datasetType, dataId, id=None)
421 datasets.append(FileDataset(path=metricFile,
422 refs=[refIn],
423 formatter=formatter))
425 butler.ingest(*datasets, transfer="copy")
427 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
428 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
430 metrics1 = butler.get(datasetTypeName, dataId1)
431 metrics2 = butler.get(datasetTypeName, dataId2)
432 self.assertNotEqual(metrics1, metrics2)
434 # Compare URIs
435 uri1 = butler.getUri(datasetTypeName, dataId1)
436 uri2 = butler.getUri(datasetTypeName, dataId2)
437 self.assertNotEqual(uri1, uri2)
439 # Now do a multi-dataset but single file ingest
440 metricFile = os.path.join(dataRoot, "detectors.yaml")
441 refs = []
442 for detector in (1, 2):
443 detector_name = f"detector_{detector}"
444 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
445 # Create a DatasetRef for ingest
446 refs.append(DatasetRef(datasetType, dataId, id=None))
448 datasets = []
449 datasets.append(FileDataset(path=metricFile,
450 refs=refs,
451 formatter=MultiDetectorFormatter))
453 butler.ingest(*datasets, transfer="copy")
455 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
456 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
458 multi1 = butler.get(datasetTypeName, dataId1)
459 multi2 = butler.get(datasetTypeName, dataId2)
461 self.assertEqual(multi1, metrics1)
462 self.assertEqual(multi2, metrics2)
464 # Compare URIs
465 uri1 = butler.getUri(datasetTypeName, dataId1)
466 uri2 = butler.getUri(datasetTypeName, dataId2)
467 self.assertEqual(uri1, uri2)
469 # Test that removing one does not break the second
470 butler.remove(datasetTypeName, dataId1)
471 with self.assertRaises(LookupError):
472 butler.datasetExists(datasetTypeName, dataId1)
473 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
474 multi2b = butler.get(datasetTypeName, dataId2)
475 self.assertEqual(multi2, multi2b)
477 def testPickle(self):
478 """Test pickle support.
479 """
480 butler = Butler(self.tmpConfigFile, run="ingest")
481 butlerOut = pickle.loads(pickle.dumps(butler))
482 self.assertIsInstance(butlerOut, Butler)
483 self.assertEqual(butlerOut._config, butler._config)
484 self.assertEqual(butlerOut.collection, butler.collection)
485 self.assertEqual(butlerOut.run, butler.run)
487 def testGetDatasetTypes(self):
488 butler = Butler(self.tmpConfigFile, run="ingest")
489 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
490 dimensionEntries = [
491 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
492 {"instrument": "DummyCamComp"}),
493 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
494 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
495 ]
496 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
497 # Add needed Dimensions
498 for args in dimensionEntries:
499 butler.registry.insertDimensionData(*args)
501 # When a DatasetType is added to the registry entries are created
502 # for each component. Need entries for each component in the test
503 # configuration otherwise validation won't work. The ones that
504 # are deliberately broken will be ignored later.
505 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
506 components = set()
507 for datasetTypeName in datasetTypeNames:
508 # Create and register a DatasetType
509 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
511 for componentName in storageClass.components:
512 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
514 fromRegistry = butler.registry.getAllDatasetTypes()
515 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
517 # Now that we have some dataset types registered, validate them
518 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
519 "datasetType.component"])
521 # Add a new datasetType that will fail template validation
522 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
523 if self.validationCanFail:
524 with self.assertRaises(ValidationError):
525 butler.validateConfiguration()
527 # Rerun validation but with a subset of dataset type names
528 butler.validateConfiguration(datasetTypeNames=["metric4"])
530 # Rerun validation but ignore the bad datasetType
531 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
532 "datasetType.component"])
534 def testTransaction(self):
535 butler = Butler(self.tmpConfigFile, run="ingest")
536 datasetTypeName = "test_metric"
537 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
538 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
539 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
540 "abstract_filter": "R"}),
541 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
542 "physical_filter": "d-r"}))
543 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
544 metric = makeExampleMetrics()
545 dataId = {"instrument": "DummyCam", "visit": 42}
546 with self.assertRaises(TransactionTestError):
547 with butler.transaction():
548 # Create and register a DatasetType
549 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
550 # Add needed Dimensions
551 for args in dimensionEntries:
552 butler.registry.insertDimensionData(*args)
553 # Store a dataset
554 ref = butler.put(metric, datasetTypeName, dataId)
555 self.assertIsInstance(ref, DatasetRef)
556 # Test getDirect
557 metricOut = butler.getDirect(ref)
558 self.assertEqual(metric, metricOut)
559 # Test get
560 metricOut = butler.get(datasetTypeName, dataId)
561 self.assertEqual(metric, metricOut)
562 # Check we can get components
563 self.assertGetComponents(butler, ref,
564 ("summary", "data", "output"), metric)
565 raise TransactionTestError("This should roll back the entire transaction")
567 with self.assertRaises(KeyError):
568 butler.registry.getDatasetType(datasetTypeName)
569 with self.assertRaises(LookupError):
570 butler.registry.expandDataId(dataId)
571 # Should raise KeyError for missing DatasetType
572 with self.assertRaises(KeyError):
573 butler.get(datasetTypeName, dataId)
574 # Also check explicitly if Dataset entry is missing
575 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId))
576 # Direct retrieval should not find the file in the Datastore
577 with self.assertRaises(FileNotFoundError):
578 butler.getDirect(ref)
580 def testMakeRepo(self):
581 """Test that we can write butler configuration to a new repository via
582 the Butler.makeRepo interface and then instantiate a butler from the
583 repo root.
584 """
585 # Do not run the test if we know this datastore configuration does
586 # not support a file system root
587 if self.fullConfigKey is None:
588 return
590 # Remove the file created in setUp
591 os.unlink(self.tmpConfigFile)
593 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
594 limited = Config(self.configFile)
595 butler1 = Butler(butlerConfig)
596 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
597 config=Config(self.configFile))
598 full = Config(self.tmpConfigFile)
599 butler2 = Butler(butlerConfig)
600 # Butlers should have the same configuration regardless of whether
601 # defaults were expanded.
602 self.assertEqual(butler1._config, butler2._config)
603 # Config files loaded directly should not be the same.
604 self.assertNotEqual(limited, full)
605 # Make sure "limited" doesn't have a few keys we know it should be
606 # inheriting from defaults.
607 self.assertIn(self.fullConfigKey, full)
608 self.assertNotIn(self.fullConfigKey, limited)
610 # Collections don't appear until something is put in them
611 collections1 = butler1.registry.getAllCollections()
612 self.assertEqual(collections1, set())
613 self.assertEqual(butler2.registry.getAllCollections(), collections1)
615 # Check that a config with no associated file name will not
616 # work properly with relocatable Butler repo
617 butlerConfig.configFile = None
618 with self.assertRaises(ValueError):
619 Butler(butlerConfig)
621 def testStringification(self):
622 butler = Butler(self.tmpConfigFile, run="ingest")
623 butlerStr = str(butler)
625 if self.datastoreStr is not None:
626 for testStr in self.datastoreStr:
627 self.assertIn(testStr, butlerStr)
628 if self.registryStr is not None:
629 self.assertIn(self.registryStr, butlerStr)
631 datastoreName = butler.datastore.name
632 if self.datastoreName is not None:
633 for testStr in self.datastoreName:
634 self.assertIn(testStr, datastoreName)
637class FileLikeDatastoreButlerTests(ButlerTests):
638 """Common tests and specialization of ButlerTests for butlers backed
639 by datastores that inherit from FileLikeDatastore.
640 """
642 def checkFileExists(self, root, path):
643 """Checks if file exists at a given path (relative to root).
645 Test testPutTemplates verifies actual physical existance of the files
646 in the requested location. For POSIXDatastore this test is equivalent
647 to `os.path.exists` call.
648 """
649 return os.path.exists(os.path.join(root, path))
651 def testPutTemplates(self):
652 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
653 butler = Butler(self.tmpConfigFile, run="ingest")
655 # Add needed Dimensions
656 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
657 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
658 "name": "d-r",
659 "abstract_filter": "R"})
660 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
661 "physical_filter": "d-r"})
662 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
663 "physical_filter": "d-r"})
665 # Create and store a dataset
666 metric = makeExampleMetrics()
668 # Create two almost-identical DatasetTypes (both will use default
669 # template)
670 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
671 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
672 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
673 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
675 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
676 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
677 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
679 # Put with exactly the data ID keys needed
680 ref = butler.put(metric, "metric1", dataId1)
681 self.assertTrue(self.checkFileExists(butler.datastore.root,
682 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
684 # Check the template based on dimensions
685 butler.datastore.templates.validateTemplates([ref])
687 # Put with extra data ID keys (physical_filter is an optional
688 # dependency); should not change template (at least the way we're
689 # defining them to behave now; the important thing is that they
690 # must be consistent).
691 ref = butler.put(metric, "metric2", dataId2)
692 self.assertTrue(self.checkFileExists(butler.datastore.root,
693 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
695 # Check the template based on dimensions
696 butler.datastore.templates.validateTemplates([ref])
698 # Now use a file template that will not result in unique filenames
699 ref = butler.put(metric, "metric3", dataId1)
701 # Check the template based on dimensions. This one is a bad template
702 with self.assertRaises(FileTemplateValidationError):
703 butler.datastore.templates.validateTemplates([ref])
705 with self.assertRaises(FileExistsError):
706 butler.put(metric, "metric3", dataId3)
708 def testImportExport(self):
709 # Run put/get tests just to create and populate a repo.
710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
711 exportButler = self.runPutGetTest(storageClass, "test_metric")
712 # Test that the repo actually has at least one dataset.
713 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
714 self.assertGreater(len(datasets), 0)
715 # Export those datasets. We used TemporaryDirectory because there
716 # doesn't seem to be a way to get the filename (as opposed to the file
717 # object) from any of tempfile's temporary-file context managers.
718 with tempfile.TemporaryDirectory() as exportDir:
719 # TODO: When PosixDatastore supports transfer-on-exist, add tests
720 # for that.
721 exportFile = os.path.join(exportDir, "exports.yaml")
722 with exportButler.export(filename=exportFile) as export:
723 export.saveDatasets(datasets)
724 self.assertTrue(os.path.exists(exportFile))
725 with tempfile.TemporaryDirectory() as importDir:
726 Butler.makeRepo(importDir, config=Config(self.configFile))
727 importButler = Butler(importDir, run="ingest")
728 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
729 transfer="symlink")
730 for ref in datasets:
731 with self.subTest(ref=ref):
732 # Test for existence by passing in the DatasetType and
733 # data ID separately, to avoid lookup by dataset_id.
734 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
737class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
738 """PosixDatastore specialization of a butler"""
739 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
740 fullConfigKey = ".datastore.formatters"
741 validationCanFail = True
742 datastoreStr = ["/tmp"]
743 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
744 registryStr = "/gen3.sqlite3"
747class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
748 """InMemoryDatastore specialization of a butler"""
749 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
750 fullConfigKey = None
751 useTempRoot = False
752 validationCanFail = False
753 datastoreStr = ["datastore='InMemory"]
754 datastoreName = ["InMemoryDatastore@"]
755 registryStr = ":memory:"
757 def testIngest(self):
758 pass
761class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
762 """PosixDatastore specialization"""
763 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
764 fullConfigKey = ".datastore.datastores.1.formatters"
765 validationCanFail = True
766 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
767 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
768 "SecondDatastore"]
769 registryStr = "/gen3.sqlite3"
772class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
773 """Test that a yaml file in one location can refer to a root in another."""
775 datastoreStr = ["dir1"]
776 # Disable the makeRepo test since we are deliberately not using
777 # butler.yaml as the config name.
778 fullConfigKey = None
780 def setUp(self):
781 self.root = tempfile.mkdtemp(dir=TESTDIR)
783 # Make a new repository in one place
784 self.dir1 = os.path.join(self.root, "dir1")
785 Butler.makeRepo(self.dir1, config=Config(self.configFile))
787 # Move the yaml file to a different place and add a "root"
788 self.dir2 = os.path.join(self.root, "dir2")
789 safeMakeDir(self.dir2)
790 configFile1 = os.path.join(self.dir1, "butler.yaml")
791 config = Config(configFile1)
792 config["root"] = self.dir1
793 configFile2 = os.path.join(self.dir2, "butler2.yaml")
794 config.dumpToFile(configFile2)
795 os.remove(configFile1)
796 self.tmpConfigFile = configFile2
798 def testFileLocations(self):
799 self.assertNotEqual(self.dir1, self.dir2)
800 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
801 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
802 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
805class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
806 """Test that a config file created by makeRepo outside of repo works."""
808 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
810 def setUp(self):
811 self.root = tempfile.mkdtemp(dir=TESTDIR)
812 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
814 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
815 Butler.makeRepo(self.root, config=Config(self.configFile),
816 outfile=self.tmpConfigFile)
818 def tearDown(self):
819 if os.path.exists(self.root2):
820 shutil.rmtree(self.root2, ignore_errors=True)
821 super().tearDown()
823 def testConfigExistence(self):
824 c = Config(self.tmpConfigFile)
825 uri_config = ButlerURI(c["root"])
826 uri_expected = ButlerURI(self.root)
827 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
828 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
830 def testPutGet(self):
831 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
832 self.runPutGetTest(storageClass, "test_metric")
835class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
836 """Test that a config file created by makeRepo outside of repo works."""
838 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
840 def setUp(self):
841 self.root = tempfile.mkdtemp(dir=TESTDIR)
842 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
844 self.tmpConfigFile = self.root2
845 Butler.makeRepo(self.root, config=Config(self.configFile),
846 outfile=self.tmpConfigFile)
848 def testConfigExistence(self):
849 # Append the yaml file else Config constructor does not know the file
850 # type.
851 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
852 super().testConfigExistence()
855class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
856 """Test that a config file created by makeRepo outside of repo works."""
858 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
860 def setUp(self):
861 self.root = tempfile.mkdtemp(dir=TESTDIR)
862 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
864 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
865 Butler.makeRepo(self.root, config=Config(self.configFile),
866 outfile=self.tmpConfigFile)
869@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
870@mock_s3
871class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
872 """S3Datastore specialization of a butler; an S3 storage Datastore +
873 a local in-memory SqlRegistry.
874 """
875 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
876 fullConfigKey = None
877 validationCanFail = True
879 bucketName = "anybucketname"
880 """Name of the Bucket that will be used in the tests. The name is read from
881 the config file used with the tests during set-up.
882 """
884 root = "butlerRoot/"
885 """Root repository directory expected to be used in case useTempRoot=False.
886 Otherwise the root is set to a 20 characters long randomly generated string
887 during set-up.
888 """
890 datastoreStr = [f"datastore={root}"]
891 """Contains all expected root locations in a format expected to be
892 returned by Butler stringification.
893 """
895 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
896 """The expected format of the S3Datastore string."""
898 registryStr = f":memory:"
899 """Expected format of the Registry string."""
901 def genRoot(self):
902 """Returns a random string of len 20 to serve as a root
903 name for the temporary bucket repo.
905 This is equivalent to tempfile.mkdtemp as this is what self.root
906 becomes when useTempRoot is True.
907 """
908 rndstr = "".join(
909 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
910 )
911 return rndstr + "/"
913 def setUp(self):
914 config = Config(self.configFile)
915 uri = ButlerURI(config[".datastore.datastore.root"])
916 self.bucketName = uri.netloc
918 # set up some fake credentials if they do not exist
919 self.usingDummyCredentials = setAwsEnvCredentials()
921 if self.useTempRoot:
922 self.root = self.genRoot()
923 rooturi = f"s3://{self.bucketName}/{self.root}"
924 config.update({"datastore": {"datastore": {"root": rooturi}}})
926 # MOTO needs to know that we expect Bucket bucketname to exist
927 # (this used to be the class attribute bucketName)
928 s3 = boto3.resource("s3")
929 s3.create_bucket(Bucket=self.bucketName)
931 self.datastoreStr = f"datastore={self.root}"
932 self.datastoreName = [f"S3Datastore@{rooturi}"]
933 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
934 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
936 def tearDown(self):
937 s3 = boto3.resource("s3")
938 bucket = s3.Bucket(self.bucketName)
939 try:
940 bucket.objects.all().delete()
941 except botocore.exceptions.ClientError as e:
942 if e.response["Error"]["Code"] == "404":
943 # the key was not reachable - pass
944 pass
945 else:
946 raise
948 bucket = s3.Bucket(self.bucketName)
949 bucket.delete()
951 # unset any potentially set dummy credentials
952 if self.usingDummyCredentials:
953 unsetAwsEnvCredentials()
955 def checkFileExists(self, root, relpath):
956 """Checks if file exists at a given path (relative to root).
958 Test testPutTemplates verifies actual physical existance of the files
959 in the requested location. For S3Datastore this test is equivalent to
960 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
961 """
962 uri = ButlerURI(root)
963 client = boto3.client("s3")
964 return s3CheckFileExists(uri, client=client)[0]
966 @unittest.expectedFailure
967 def testImportExport(self):
968 super().testImportExport()
971if __name__ == "__main__": 971 ↛ 972line 971 didn't jump to line 972, because the condition on line 971 was never true
972 unittest.main()