Coverage for tests/test_butler.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46from lsst.utils import doImport
47from lsst.daf.butler.core.safeFileIo import safeMakeDir
48from lsst.daf.butler import Butler, Config, ButlerConfig
49from lsst.daf.butler import StorageClassFactory
50from lsst.daf.butler import DatasetType, DatasetRef
51from lsst.daf.butler import FileTemplateValidationError, ValidationError
52from lsst.daf.butler import FileDataset
53from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
54from lsst.daf.butler.core.location import ButlerURI
55from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
56 unsetAwsEnvCredentials)
58from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
60TESTDIR = os.path.abspath(os.path.dirname(__file__))
63def makeExampleMetrics():
64 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
65 {"a": [1, 2, 3],
66 "b": {"blue": 5, "red": "green"}},
67 [563, 234, 456.7, 752, 8, 9, 27]
68 )
71class TransactionTestError(Exception):
72 """Specific error for testing transactions, to prevent misdiagnosing
73 that might otherwise occur when a standard exception is used.
74 """
75 pass
78class ButlerConfigTests(unittest.TestCase):
79 """Simple tests for ButlerConfig that are not tested in other test cases.
80 """
82 def testSearchPath(self):
83 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
84 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
85 config1 = ButlerConfig(configFile)
86 self.assertNotIn("testConfigs", "\n".join(cm.output))
88 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
89 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
90 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
91 self.assertIn("testConfigs", "\n".join(cm.output))
93 key = ("datastore", "records", "table")
94 self.assertNotEqual(config1[key], config2[key])
95 self.assertEqual(config2[key], "override_record")
98class ButlerPutGetTests:
99 """Helper method for running a suite of put/get tests from different
100 butler configurations."""
102 root = None
104 @staticmethod
105 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
106 """Create a DatasetType and register it
107 """
108 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
109 registry.registerDatasetType(datasetType)
110 return datasetType
112 @classmethod
113 def setUpClass(cls):
114 cls.storageClassFactory = StorageClassFactory()
115 cls.storageClassFactory.addFromConfig(cls.configFile)
117 def assertGetComponents(self, butler, datasetRef, components, reference):
118 datasetTypeName = datasetRef.datasetType.name
119 dataId = datasetRef.dataId
120 for component in components:
121 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
122 result = butler.get(compTypeName, dataId)
123 self.assertEqual(result, getattr(reference, component))
125 def tearDown(self):
126 if self.root is not None and os.path.exists(self.root):
127 shutil.rmtree(self.root, ignore_errors=True)
129 def runPutGetTest(self, storageClass, datasetTypeName):
130 butler = Butler(self.tmpConfigFile, run="ingest")
132 # There will not be a collection yet
133 collections = butler.registry.getAllCollections()
134 self.assertEqual(collections, set())
136 # Create and register a DatasetType
137 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
139 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
141 # Add needed Dimensions
142 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
143 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
144 "name": "d-r",
145 "abstract_filter": "R"})
146 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
147 "name": "fourtwentythree", "physical_filter": "d-r"})
149 # Create and store a dataset
150 metric = makeExampleMetrics()
151 dataId = {"instrument": "DummyCamComp", "visit": 423}
153 # Create a DatasetRef for put
154 refIn = DatasetRef(datasetType, dataId, id=None)
156 # Put with a preexisting id should fail
157 with self.assertRaises(ValueError):
158 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
160 # Put and remove the dataset once as a DatasetRef, once as a dataId,
161 # and once with a DatasetType
162 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
163 with self.subTest(args=args):
164 ref = butler.put(metric, *args)
165 self.assertIsInstance(ref, DatasetRef)
167 # Test getDirect
168 metricOut = butler.getDirect(ref)
169 self.assertEqual(metric, metricOut)
170 # Test get
171 metricOut = butler.get(ref.datasetType.name, dataId)
172 self.assertEqual(metric, metricOut)
173 # Test get with a datasetRef
174 metricOut = butler.get(ref)
175 self.assertEqual(metric, metricOut)
176 # Test getDeferred with dataId
177 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
178 self.assertEqual(metric, metricOut)
179 # Test getDeferred with a datasetRef
180 metricOut = butler.getDeferred(ref).get()
181 self.assertEqual(metric, metricOut)
183 # Check we can get components
184 if storageClass.isComposite():
185 self.assertGetComponents(butler, ref,
186 ("summary", "data", "output"), metric)
188 # Remove from collection only; after that we shouldn't be able
189 # to find it unless we use the dataset_id.
190 butler.remove(*args, delete=False)
191 with self.assertRaises(LookupError):
192 butler.datasetExists(*args)
193 # If we use the output ref with the dataset_id, we should
194 # still be able to load it with getDirect().
195 self.assertEqual(metric, butler.getDirect(ref))
197 # Reinsert into collection, then delete from Datastore *and*
198 # remove from collection.
199 butler.registry.associate(butler.collection, [ref])
200 butler.remove(*args)
201 # Lookup with original args should still fail.
202 with self.assertRaises(LookupError):
203 butler.datasetExists(*args)
204 # Now getDirect() should fail, too.
205 with self.assertRaises(FileNotFoundError):
206 butler.getDirect(ref)
207 # Registry still knows about it, if we use the dataset_id.
208 self.assertEqual(butler.registry.getDataset(ref.id), ref)
210 # Put again, then remove completely (this generates a new
211 # dataset record in registry, with a new ID - the old one
212 # still exists but it is not in any collection so we don't
213 # care).
214 ref = butler.put(metric, *args)
215 butler.remove(*args, remember=False)
216 # Lookup with original args should still fail.
217 with self.assertRaises(LookupError):
218 butler.datasetExists(*args)
219 # getDirect() should still fail.
220 with self.assertRaises(FileNotFoundError):
221 butler.getDirect(ref)
222 # Registry shouldn't be able to find it by dataset_id anymore.
223 self.assertIsNone(butler.registry.getDataset(ref.id))
225 # Put the dataset again, since the last thing we did was remove it.
226 ref = butler.put(metric, refIn)
228 # Get with parameters
229 stop = 4
230 sliced = butler.get(ref, parameters={"slice": slice(stop)})
231 self.assertNotEqual(metric, sliced)
232 self.assertEqual(metric.summary, sliced.summary)
233 self.assertEqual(metric.output, sliced.output)
234 self.assertEqual(metric.data[:stop], sliced.data)
235 # getDeferred with parameters
236 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
237 self.assertNotEqual(metric, sliced)
238 self.assertEqual(metric.summary, sliced.summary)
239 self.assertEqual(metric.output, sliced.output)
240 self.assertEqual(metric.data[:stop], sliced.data)
241 # getDeferred with deferred parameters
242 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
243 self.assertNotEqual(metric, sliced)
244 self.assertEqual(metric.summary, sliced.summary)
245 self.assertEqual(metric.output, sliced.output)
246 self.assertEqual(metric.data[:stop], sliced.data)
248 if storageClass.isComposite():
249 # Delete one component and check that the other components
250 # can still be retrieved
251 metricOut = butler.get(ref.datasetType.name, dataId)
252 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
253 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
254 summary = butler.get(compNameS, dataId)
255 self.assertEqual(summary, metric.summary)
256 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
258 butler.remove(compNameS, dataId, remember=True)
259 with self.assertRaises(LookupError):
260 butler.datasetExists(compNameS, dataId)
261 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
262 self.assertTrue(butler.datastore.exists(ref.components["data"]))
263 data = butler.get(compNameD, dataId)
264 self.assertEqual(data, metric.data)
266 # Combining a DatasetRef with a dataId should fail
267 with self.assertRaises(ValueError):
268 butler.get(ref, dataId)
269 # Getting with an explicit ref should fail if the id doesn't match
270 with self.assertRaises(ValueError):
271 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
273 # Getting a dataset with unknown parameters should fail
274 with self.assertRaises(KeyError):
275 butler.get(ref, parameters={"unsupported": True})
277 # Check we have a collection
278 collections = butler.registry.getAllCollections()
279 self.assertEqual(collections, {"ingest", })
281 # Clean up to check that we can remove something that may have
282 # already had a component removed
283 butler.remove(ref.datasetType.name, dataId)
285 # Add a dataset back in since some downstream tests require
286 # something to be present
287 ref = butler.put(metric, refIn)
289 return butler
291 # Construct a butler with no run or collection, but make it writeable.
292 butler = Butler(self.tmpConfigFile, writeable=True)
293 # Create and register a DatasetType
294 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
295 datasetType = self.addDatasetType("example", dimensions,
296 self.storageClassFactory.getStorageClass("StructuredData"),
297 butler.registry)
298 # Add needed Dimensions
299 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
300 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
301 "name": "d-r",
302 "abstract_filter": "R"})
303 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
304 "name": "fourtwentythree", "physical_filter": "d-r"})
305 dataId = {"instrument": "DummyCamComp", "visit": 423}
306 # Create dataset.
307 metric = makeExampleMetrics()
308 # Register a new run and put dataset.
309 run = "deferred"
310 butler.registry.registerRun(run)
311 ref = butler.put(metric, datasetType, dataId, run=run)
312 # Putting with no run should fail with TypeError.
313 with self.assertRaises(TypeError):
314 butler.put(metric, datasetType, dataId)
315 # Dataset should exist.
316 self.assertTrue(butler.datasetExists(datasetType, dataId, collection=run))
317 # We should be able to get the dataset back, but with and without
318 # a deferred dataset handle.
319 self.assertEqual(metric, butler.get(datasetType, dataId, collection=run))
320 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collection=run).get())
321 # Trying to find the dataset without any collection is a TypeError.
322 with self.assertRaises(TypeError):
323 butler.datasetExists(datasetType, dataId)
324 with self.assertRaises(TypeError):
325 butler.get(datasetType, dataId)
326 with self.assertRaises(TypeError):
327 butler.remove(datasetType, dataId)
328 # Associate the dataset with a different collection.
329 butler.registry.associate("tagged", [ref])
330 # Deleting the dataset from the new collection should make it findable
331 # in the original collection but without a Datastore entry.
332 butler.remove(datasetType, dataId, collection="tagged")
333 self.assertFalse(butler.datasetExists(datasetType, dataId, collection=run))
336class ButlerTests(ButlerPutGetTests):
337 """Tests for Butler.
338 """
339 useTempRoot = True
341 def setUp(self):
342 """Create a new butler root for each test."""
343 if self.useTempRoot:
344 self.root = tempfile.mkdtemp(dir=TESTDIR)
345 Butler.makeRepo(self.root, config=Config(self.configFile))
346 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
347 else:
348 self.root = None
349 self.tmpConfigFile = self.configFile
351 def testConstructor(self):
352 """Independent test of constructor.
353 """
354 butler = Butler(self.tmpConfigFile, run="ingest")
355 self.assertIsInstance(butler, Butler)
357 collections = butler.registry.getAllCollections()
358 self.assertEqual(collections, set())
360 butler2 = Butler(butler=butler, collection="other")
361 self.assertEqual(butler2.collection, "other")
362 self.assertIsNone(butler2.run)
363 self.assertIs(butler.registry, butler2.registry)
364 self.assertIs(butler.datastore, butler2.datastore)
366 def testBasicPutGet(self):
367 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
368 self.runPutGetTest(storageClass, "test_metric")
370 def testCompositePutGetConcrete(self):
371 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
372 self.runPutGetTest(storageClass, "test_metric")
374 def testCompositePutGetVirtual(self):
375 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
376 self.runPutGetTest(storageClass, "test_metric_comp")
378 def testIngest(self):
379 butler = Butler(self.tmpConfigFile, run="ingest")
381 # Create and register a DatasetType
382 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
384 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
385 datasetTypeName = "metric"
387 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
389 # Add needed Dimensions
390 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
391 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
392 "name": "d-r",
393 "abstract_filter": "R"})
394 for detector in (1, 2):
395 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
396 "full_name": f"detector{detector}"})
398 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
399 "name": "fourtwentythree", "physical_filter": "d-r"},
400 {"instrument": "DummyCamComp", "id": 424,
401 "name": "fourtwentyfour", "physical_filter": "d-r"})
403 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
404 dataRoot = os.path.join(TESTDIR, "data", "basic")
405 datasets = []
406 for detector in (1, 2):
407 detector_name = f"detector_{detector}"
408 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
409 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
410 # Create a DatasetRef for ingest
411 refIn = DatasetRef(datasetType, dataId, id=None)
413 datasets.append(FileDataset(path=metricFile,
414 refs=[refIn],
415 formatter=formatter))
417 butler.ingest(*datasets, transfer="copy")
419 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
420 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
422 metrics1 = butler.get(datasetTypeName, dataId1)
423 metrics2 = butler.get(datasetTypeName, dataId2)
424 self.assertNotEqual(metrics1, metrics2)
426 # Compare URIs
427 uri1 = butler.getUri(datasetTypeName, dataId1)
428 uri2 = butler.getUri(datasetTypeName, dataId2)
429 self.assertNotEqual(uri1, uri2)
431 # Now do a multi-dataset but single file ingest
432 metricFile = os.path.join(dataRoot, "detectors.yaml")
433 refs = []
434 for detector in (1, 2):
435 detector_name = f"detector_{detector}"
436 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
437 # Create a DatasetRef for ingest
438 refs.append(DatasetRef(datasetType, dataId, id=None))
440 datasets = []
441 datasets.append(FileDataset(path=metricFile,
442 refs=refs,
443 formatter=MultiDetectorFormatter))
445 butler.ingest(*datasets, transfer="copy")
447 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
448 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
450 multi1 = butler.get(datasetTypeName, dataId1)
451 multi2 = butler.get(datasetTypeName, dataId2)
453 self.assertEqual(multi1, metrics1)
454 self.assertEqual(multi2, metrics2)
456 # Compare URIs
457 uri1 = butler.getUri(datasetTypeName, dataId1)
458 uri2 = butler.getUri(datasetTypeName, dataId2)
459 self.assertEqual(uri1, uri2)
461 # Test that removing one does not break the second
462 butler.remove(datasetTypeName, dataId1)
463 with self.assertRaises(LookupError):
464 butler.datasetExists(datasetTypeName, dataId1)
465 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
466 multi2b = butler.get(datasetTypeName, dataId2)
467 self.assertEqual(multi2, multi2b)
469 def testPickle(self):
470 """Test pickle support.
471 """
472 butler = Butler(self.tmpConfigFile, run="ingest")
473 butlerOut = pickle.loads(pickle.dumps(butler))
474 self.assertIsInstance(butlerOut, Butler)
475 self.assertEqual(butlerOut._config, butler._config)
476 self.assertEqual(butlerOut.collection, butler.collection)
477 self.assertEqual(butlerOut.run, butler.run)
479 def testGetDatasetTypes(self):
480 butler = Butler(self.tmpConfigFile, run="ingest")
481 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
482 dimensionEntries = [
483 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
484 {"instrument": "DummyCamComp"}),
485 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
486 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
487 ]
488 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
489 # Add needed Dimensions
490 for args in dimensionEntries:
491 butler.registry.insertDimensionData(*args)
493 # When a DatasetType is added to the registry entries are created
494 # for each component. Need entries for each component in the test
495 # configuration otherwise validation won't work. The ones that
496 # are deliberately broken will be ignored later.
497 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
498 components = set()
499 for datasetTypeName in datasetTypeNames:
500 # Create and register a DatasetType
501 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
503 for componentName in storageClass.components:
504 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
506 fromRegistry = butler.registry.getAllDatasetTypes()
507 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
509 # Now that we have some dataset types registered, validate them
510 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
511 "datasetType.component"])
513 # Add a new datasetType that will fail template validation
514 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
515 if self.validationCanFail:
516 with self.assertRaises(ValidationError):
517 butler.validateConfiguration()
519 # Rerun validation but with a subset of dataset type names
520 butler.validateConfiguration(datasetTypeNames=["metric4"])
522 # Rerun validation but ignore the bad datasetType
523 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
524 "datasetType.component"])
526 def testTransaction(self):
527 butler = Butler(self.tmpConfigFile, run="ingest")
528 datasetTypeName = "test_metric"
529 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
530 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
531 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
532 "abstract_filter": "R"}),
533 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
534 "physical_filter": "d-r"}))
535 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
536 metric = makeExampleMetrics()
537 dataId = {"instrument": "DummyCam", "visit": 42}
538 with self.assertRaises(TransactionTestError):
539 with butler.transaction():
540 # Create and register a DatasetType
541 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
542 # Add needed Dimensions
543 for args in dimensionEntries:
544 butler.registry.insertDimensionData(*args)
545 # Store a dataset
546 ref = butler.put(metric, datasetTypeName, dataId)
547 self.assertIsInstance(ref, DatasetRef)
548 # Test getDirect
549 metricOut = butler.getDirect(ref)
550 self.assertEqual(metric, metricOut)
551 # Test get
552 metricOut = butler.get(datasetTypeName, dataId)
553 self.assertEqual(metric, metricOut)
554 # Check we can get components
555 self.assertGetComponents(butler, ref,
556 ("summary", "data", "output"), metric)
557 raise TransactionTestError("This should roll back the entire transaction")
559 with self.assertRaises(KeyError):
560 butler.registry.getDatasetType(datasetTypeName)
561 with self.assertRaises(LookupError):
562 butler.registry.expandDataId(dataId)
563 # Should raise KeyError for missing DatasetType
564 with self.assertRaises(KeyError):
565 butler.get(datasetTypeName, dataId)
566 # Also check explicitly if Dataset entry is missing
567 self.assertIsNone(butler.registry.find(butler.collection, datasetType, dataId))
568 # Direct retrieval should not find the file in the Datastore
569 with self.assertRaises(FileNotFoundError):
570 butler.getDirect(ref)
572 def testMakeRepo(self):
573 """Test that we can write butler configuration to a new repository via
574 the Butler.makeRepo interface and then instantiate a butler from the
575 repo root.
576 """
577 # Do not run the test if we know this datastore configuration does
578 # not support a file system root
579 if self.fullConfigKey is None:
580 return
582 # Remove the file created in setUp
583 os.unlink(self.tmpConfigFile)
585 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
586 limited = Config(self.configFile)
587 butler1 = Butler(butlerConfig)
588 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
589 config=Config(self.configFile))
590 full = Config(self.tmpConfigFile)
591 butler2 = Butler(butlerConfig)
592 # Butlers should have the same configuration regardless of whether
593 # defaults were expanded.
594 self.assertEqual(butler1._config, butler2._config)
595 # Config files loaded directly should not be the same.
596 self.assertNotEqual(limited, full)
597 # Make sure "limited" doesn't have a few keys we know it should be
598 # inheriting from defaults.
599 self.assertIn(self.fullConfigKey, full)
600 self.assertNotIn(self.fullConfigKey, limited)
602 # Collections don't appear until something is put in them
603 collections1 = butler1.registry.getAllCollections()
604 self.assertEqual(collections1, set())
605 self.assertEqual(butler2.registry.getAllCollections(), collections1)
607 # Check that a config with no associated file name will not
608 # work properly with relocatable Butler repo
609 butlerConfig.configFile = None
610 with self.assertRaises(ValueError):
611 Butler(butlerConfig)
613 def testStringification(self):
614 butler = Butler(self.tmpConfigFile, run="ingest")
615 butlerStr = str(butler)
617 if self.datastoreStr is not None:
618 for testStr in self.datastoreStr:
619 self.assertIn(testStr, butlerStr)
620 if self.registryStr is not None:
621 self.assertIn(self.registryStr, butlerStr)
623 datastoreName = butler.datastore.name
624 if self.datastoreName is not None:
625 for testStr in self.datastoreName:
626 self.assertIn(testStr, datastoreName)
629class FileLikeDatastoreButlerTests(ButlerTests):
630 """Common tests and specialization of ButlerTests for butlers backed
631 by datastores that inherit from FileLikeDatastore.
632 """
634 def checkFileExists(self, root, path):
635 """Checks if file exists at a given path (relative to root).
637 Test testPutTemplates verifies actual physical existance of the files
638 in the requested location. For POSIXDatastore this test is equivalent
639 to `os.path.exists` call.
640 """
641 return os.path.exists(os.path.join(root, path))
643 def testPutTemplates(self):
644 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
645 butler = Butler(self.tmpConfigFile, run="ingest")
647 # Add needed Dimensions
648 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
649 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
650 "name": "d-r",
651 "abstract_filter": "R"})
652 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
653 "physical_filter": "d-r"})
654 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
655 "physical_filter": "d-r"})
657 # Create and store a dataset
658 metric = makeExampleMetrics()
660 # Create two almost-identical DatasetTypes (both will use default
661 # template)
662 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
663 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
664 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
665 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
667 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
668 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
669 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
671 # Put with exactly the data ID keys needed
672 ref = butler.put(metric, "metric1", dataId1)
673 self.assertTrue(self.checkFileExists(butler.datastore.root,
674 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
676 # Check the template based on dimensions
677 butler.datastore.templates.validateTemplates([ref])
679 # Put with extra data ID keys (physical_filter is an optional
680 # dependency); should not change template (at least the way we're
681 # defining them to behave now; the important thing is that they
682 # must be consistent).
683 ref = butler.put(metric, "metric2", dataId2)
684 self.assertTrue(self.checkFileExists(butler.datastore.root,
685 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
687 # Check the template based on dimensions
688 butler.datastore.templates.validateTemplates([ref])
690 # Now use a file template that will not result in unique filenames
691 ref = butler.put(metric, "metric3", dataId1)
693 # Check the template based on dimensions. This one is a bad template
694 with self.assertRaises(FileTemplateValidationError):
695 butler.datastore.templates.validateTemplates([ref])
697 with self.assertRaises(FileExistsError):
698 butler.put(metric, "metric3", dataId3)
700 def testImportExport(self):
701 # Run put/get tests just to create and populate a repo.
702 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
703 exportButler = self.runPutGetTest(storageClass, "test_metric")
704 # Test that the repo actually has at least one dataset.
705 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
706 self.assertGreater(len(datasets), 0)
707 # Export those datasets. We used TemporaryDirectory because there
708 # doesn't seem to be a way to get the filename (as opposed to the file
709 # object) from any of tempfile's temporary-file context managers.
710 with tempfile.TemporaryDirectory() as exportDir:
711 # TODO: When PosixDatastore supports transfer-on-exist, add tests
712 # for that.
713 exportFile = os.path.join(exportDir, "exports.yaml")
714 with exportButler.export(filename=exportFile) as export:
715 export.saveDatasets(datasets)
716 self.assertTrue(os.path.exists(exportFile))
717 with tempfile.TemporaryDirectory() as importDir:
718 Butler.makeRepo(importDir, config=Config(self.configFile))
719 importButler = Butler(importDir, run="ingest")
720 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
721 transfer="symlink")
722 for ref in datasets:
723 with self.subTest(ref=ref):
724 # Test for existence by passing in the DatasetType and
725 # data ID separately, to avoid lookup by dataset_id.
726 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
729class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
730 """PosixDatastore specialization of a butler"""
731 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
732 fullConfigKey = ".datastore.formatters"
733 validationCanFail = True
734 datastoreStr = ["/tmp"]
735 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
736 registryStr = "/gen3.sqlite3"
739class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
740 """InMemoryDatastore specialization of a butler"""
741 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
742 fullConfigKey = None
743 useTempRoot = False
744 validationCanFail = False
745 datastoreStr = ["datastore='InMemory"]
746 datastoreName = ["InMemoryDatastore@"]
747 registryStr = ":memory:"
749 def testIngest(self):
750 pass
753class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
754 """PosixDatastore specialization"""
755 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
756 fullConfigKey = ".datastore.datastores.1.formatters"
757 validationCanFail = True
758 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
759 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
760 "SecondDatastore"]
761 registryStr = "/gen3.sqlite3"
764class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
765 """Test that a yaml file in one location can refer to a root in another."""
767 datastoreStr = ["dir1"]
768 # Disable the makeRepo test since we are deliberately not using
769 # butler.yaml as the config name.
770 fullConfigKey = None
772 def setUp(self):
773 self.root = tempfile.mkdtemp(dir=TESTDIR)
775 # Make a new repository in one place
776 self.dir1 = os.path.join(self.root, "dir1")
777 Butler.makeRepo(self.dir1, config=Config(self.configFile))
779 # Move the yaml file to a different place and add a "root"
780 self.dir2 = os.path.join(self.root, "dir2")
781 safeMakeDir(self.dir2)
782 configFile1 = os.path.join(self.dir1, "butler.yaml")
783 config = Config(configFile1)
784 config["root"] = self.dir1
785 configFile2 = os.path.join(self.dir2, "butler2.yaml")
786 config.dumpToFile(configFile2)
787 os.remove(configFile1)
788 self.tmpConfigFile = configFile2
790 def testFileLocations(self):
791 self.assertNotEqual(self.dir1, self.dir2)
792 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
793 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
794 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
797class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
798 """Test that a config file created by makeRepo outside of repo works."""
800 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
802 def setUp(self):
803 self.root = tempfile.mkdtemp(dir=TESTDIR)
804 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
806 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
807 Butler.makeRepo(self.root, config=Config(self.configFile),
808 outfile=self.tmpConfigFile)
810 def tearDown(self):
811 if os.path.exists(self.root2):
812 shutil.rmtree(self.root2, ignore_errors=True)
813 super().tearDown()
815 def testConfigExistence(self):
816 c = Config(self.tmpConfigFile)
817 uri_config = ButlerURI(c["root"])
818 uri_expected = ButlerURI(self.root)
819 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
820 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
822 def testPutGet(self):
823 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
824 self.runPutGetTest(storageClass, "test_metric")
827class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
828 """Test that a config file created by makeRepo outside of repo works."""
830 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
832 def setUp(self):
833 self.root = tempfile.mkdtemp(dir=TESTDIR)
834 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
836 self.tmpConfigFile = self.root2
837 Butler.makeRepo(self.root, config=Config(self.configFile),
838 outfile=self.tmpConfigFile)
840 def testConfigExistence(self):
841 # Append the yaml file else Config constructor does not know the file
842 # type.
843 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
844 super().testConfigExistence()
847class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
848 """Test that a config file created by makeRepo outside of repo works."""
850 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
852 def setUp(self):
853 self.root = tempfile.mkdtemp(dir=TESTDIR)
854 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
856 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
857 Butler.makeRepo(self.root, config=Config(self.configFile),
858 outfile=self.tmpConfigFile)
861@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
862@mock_s3
863class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
864 """S3Datastore specialization of a butler; an S3 storage Datastore +
865 a local in-memory SqlRegistry.
866 """
867 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
868 fullConfigKey = None
869 validationCanFail = True
871 bucketName = "anybucketname"
872 """Name of the Bucket that will be used in the tests. The name is read from
873 the config file used with the tests during set-up.
874 """
876 root = "butlerRoot/"
877 """Root repository directory expected to be used in case useTempRoot=False.
878 Otherwise the root is set to a 20 characters long randomly generated string
879 during set-up.
880 """
882 datastoreStr = [f"datastore={root}"]
883 """Contains all expected root locations in a format expected to be
884 returned by Butler stringification.
885 """
887 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
888 """The expected format of the S3Datastore string."""
890 registryStr = f":memory:"
891 """Expected format of the Registry string."""
893 def genRoot(self):
894 """Returns a random string of len 20 to serve as a root
895 name for the temporary bucket repo.
897 This is equivalent to tempfile.mkdtemp as this is what self.root
898 becomes when useTempRoot is True.
899 """
900 rndstr = "".join(
901 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
902 )
903 return rndstr + "/"
905 def setUp(self):
906 config = Config(self.configFile)
907 uri = ButlerURI(config[".datastore.datastore.root"])
908 self.bucketName = uri.netloc
910 # set up some fake credentials if they do not exist
911 self.usingDummyCredentials = setAwsEnvCredentials()
913 if self.useTempRoot:
914 self.root = self.genRoot()
915 rooturi = f"s3://{self.bucketName}/{self.root}"
916 config.update({"datastore": {"datastore": {"root": rooturi}}})
918 # MOTO needs to know that we expect Bucket bucketname to exist
919 # (this used to be the class attribute bucketName)
920 s3 = boto3.resource("s3")
921 s3.create_bucket(Bucket=self.bucketName)
923 self.datastoreStr = f"datastore={self.root}"
924 self.datastoreName = [f"S3Datastore@{rooturi}"]
925 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
926 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
928 def tearDown(self):
929 s3 = boto3.resource("s3")
930 bucket = s3.Bucket(self.bucketName)
931 try:
932 bucket.objects.all().delete()
933 except botocore.exceptions.ClientError as e:
934 if e.response["Error"]["Code"] == "404":
935 # the key was not reachable - pass
936 pass
937 else:
938 raise
940 bucket = s3.Bucket(self.bucketName)
941 bucket.delete()
943 # unset any potentially set dummy credentials
944 if self.usingDummyCredentials:
945 unsetAwsEnvCredentials()
947 def checkFileExists(self, root, relpath):
948 """Checks if file exists at a given path (relative to root).
950 Test testPutTemplates verifies actual physical existance of the files
951 in the requested location. For S3Datastore this test is equivalent to
952 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
953 """
954 uri = ButlerURI(root)
955 client = boto3.client("s3")
956 return s3CheckFileExists(uri, client=client)[0]
958 @unittest.expectedFailure
959 def testImportExport(self):
960 super().testImportExport()
963if __name__ == "__main__": 963 ↛ 964line 963 didn't jump to line 964, because the condition on line 963 was never true
964 unittest.main()