Coverage for tests/test_butler.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46import astropy.time
47from lsst.utils import doImport
48from lsst.daf.butler.core.utils import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler import ButlerURI
56from lsst.daf.butler import script
57from lsst.daf.butler.registry import MissingCollectionError
58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
59from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials,
60 unsetAwsEnvCredentials)
62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
64TESTDIR = os.path.abspath(os.path.dirname(__file__))
67def makeExampleMetrics():
68 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
69 {"a": [1, 2, 3],
70 "b": {"blue": 5, "red": "green"}},
71 [563, 234, 456.7, 752, 8, 9, 27]
72 )
75class TransactionTestError(Exception):
76 """Specific error for testing transactions, to prevent misdiagnosing
77 that might otherwise occur when a standard exception is used.
78 """
79 pass
82class ButlerConfigTests(unittest.TestCase):
83 """Simple tests for ButlerConfig that are not tested in other test cases.
84 """
86 def testSearchPath(self):
87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
89 config1 = ButlerConfig(configFile)
90 self.assertNotIn("testConfigs", "\n".join(cm.output))
92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
95 self.assertIn("testConfigs", "\n".join(cm.output))
97 key = ("datastore", "records", "table")
98 self.assertNotEqual(config1[key], config2[key])
99 self.assertEqual(config2[key], "override_record")
102class ButlerPutGetTests:
103 """Helper method for running a suite of put/get tests from different
104 butler configurations."""
106 root = None
108 @staticmethod
109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
110 """Create a DatasetType and register it
111 """
112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
113 registry.registerDatasetType(datasetType)
114 return datasetType
116 @classmethod
117 def setUpClass(cls):
118 cls.storageClassFactory = StorageClassFactory()
119 cls.storageClassFactory.addFromConfig(cls.configFile)
121 def assertGetComponents(self, butler, datasetRef, components, reference):
122 datasetType = datasetRef.datasetType
123 dataId = datasetRef.dataId
124 for component in components:
125 compTypeName = datasetType.componentTypeName(component)
126 result = butler.get(compTypeName, dataId)
127 self.assertEqual(result, getattr(reference, component))
129 def tearDown(self):
130 if self.root is not None and os.path.exists(self.root):
131 shutil.rmtree(self.root, ignore_errors=True)
133 def runPutGetTest(self, storageClass, datasetTypeName):
134 # New datasets will be added to run and tag, but we will only look in
135 # tag when looking up datasets.
136 run = "ingest/run"
137 tag = "ingest"
138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
140 # There will not be a collection yet
141 collections = set(butler.registry.queryCollections())
142 self.assertEqual(collections, set([run, tag]))
144 # Create and register a DatasetType
145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
149 # Try to create one that will have a name that is too long
150 with self.assertRaises(Exception) as cm:
151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
152 self.assertIn("check constraint", str(cm.exception).lower())
154 # Add needed Dimensions
155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
157 "name": "d-r",
158 "abstract_filter": "R"})
159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
160 "id": 1,
161 "name": "default"})
162 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
163 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
164 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
165 "name": "fourtwentythree", "physical_filter": "d-r",
166 "visit_system": 1, "datetime_begin": visit_start,
167 "datetime_end": visit_end})
169 # Add a second visit for some later tests
170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
171 "name": "fourtwentyfour", "physical_filter": "d-r",
172 "visit_system": 1})
174 # Create and store a dataset
175 metric = makeExampleMetrics()
176 dataId = {"instrument": "DummyCamComp", "visit": 423}
178 # Create a DatasetRef for put
179 refIn = DatasetRef(datasetType, dataId, id=None)
181 # Put with a preexisting id should fail
182 with self.assertRaises(ValueError):
183 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
185 # Put and remove the dataset once as a DatasetRef, once as a dataId,
186 # and once with a DatasetType
187 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
188 with self.subTest(args=args):
189 ref = butler.put(metric, *args)
190 self.assertIsInstance(ref, DatasetRef)
192 # Test getDirect
193 metricOut = butler.getDirect(ref)
194 self.assertEqual(metric, metricOut)
195 # Test get
196 metricOut = butler.get(ref.datasetType.name, dataId)
197 self.assertEqual(metric, metricOut)
198 # Test get with a datasetRef
199 metricOut = butler.get(ref)
200 self.assertEqual(metric, metricOut)
201 # Test getDeferred with dataId
202 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
203 self.assertEqual(metric, metricOut)
204 # Test getDeferred with a datasetRef
205 metricOut = butler.getDeferred(ref).get()
206 self.assertEqual(metric, metricOut)
208 # Check we can get components
209 if storageClass.isComposite():
210 self.assertGetComponents(butler, ref,
211 ("summary", "data", "output"), metric)
213 # Remove from the tagged collection only; after that we
214 # shouldn't be able to find it unless we use the dataset_id.
215 butler.pruneDatasets([ref])
216 with self.assertRaises(LookupError):
217 butler.datasetExists(*args)
218 # Registry still knows about it, if we use the dataset_id.
219 self.assertEqual(butler.registry.getDataset(ref.id), ref)
220 # If we use the output ref with the dataset_id, we should
221 # still be able to load it with getDirect().
222 self.assertEqual(metric, butler.getDirect(ref))
224 # Reinsert into collection, then delete from Datastore *and*
225 # remove from collection.
226 butler.registry.associate(tag, [ref])
227 butler.pruneDatasets([ref], unstore=True)
228 # Lookup with original args should still fail.
229 with self.assertRaises(LookupError):
230 butler.datasetExists(*args)
231 # Now getDirect() should fail, too.
232 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
233 butler.getDirect(ref)
234 # Registry still knows about it, if we use the dataset_id.
235 self.assertEqual(butler.registry.getDataset(ref.id), ref)
237 # Now remove the dataset completely.
238 butler.pruneDatasets([ref], purge=True, unstore=True)
239 # Lookup with original args should still fail.
240 with self.assertRaises(LookupError):
241 butler.datasetExists(*args)
242 # getDirect() should still fail.
243 with self.assertRaises(FileNotFoundError):
244 butler.getDirect(ref)
245 # Registry shouldn't be able to find it by dataset_id anymore.
246 self.assertIsNone(butler.registry.getDataset(ref.id))
248 # Put the dataset again, since the last thing we did was remove it.
249 ref = butler.put(metric, refIn)
251 # Get with parameters
252 stop = 4
253 sliced = butler.get(ref, parameters={"slice": slice(stop)})
254 self.assertNotEqual(metric, sliced)
255 self.assertEqual(metric.summary, sliced.summary)
256 self.assertEqual(metric.output, sliced.output)
257 self.assertEqual(metric.data[:stop], sliced.data)
258 # getDeferred with parameters
259 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
260 self.assertNotEqual(metric, sliced)
261 self.assertEqual(metric.summary, sliced.summary)
262 self.assertEqual(metric.output, sliced.output)
263 self.assertEqual(metric.data[:stop], sliced.data)
264 # getDeferred with deferred parameters
265 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
266 self.assertNotEqual(metric, sliced)
267 self.assertEqual(metric.summary, sliced.summary)
268 self.assertEqual(metric.output, sliced.output)
269 self.assertEqual(metric.data[:stop], sliced.data)
271 if storageClass.isComposite():
272 # Check that components can be retrieved
273 metricOut = butler.get(ref.datasetType.name, dataId)
274 compNameS = ref.datasetType.componentTypeName("summary")
275 compNameD = ref.datasetType.componentTypeName("data")
276 summary = butler.get(compNameS, dataId)
277 self.assertEqual(summary, metric.summary)
278 data = butler.get(compNameD, dataId)
279 self.assertEqual(data, metric.data)
281 if "counter" in storageClass.readComponents:
282 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
283 self.assertEqual(count, len(data))
285 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
286 parameters={"slice": slice(stop)})
287 self.assertEqual(count, stop)
289 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
290 summary = butler.getDirect(compRef)
291 self.assertEqual(summary, metric.summary)
293 # Create a Dataset type that has the same name but is inconsistent.
294 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
295 self.storageClassFactory.getStorageClass("Config"))
297 # Getting with a dataset type that does not match registry fails
298 with self.assertRaises(ValueError):
299 butler.get(inconsistentDatasetType, dataId)
301 # Combining a DatasetRef with a dataId should fail
302 with self.assertRaises(ValueError):
303 butler.get(ref, dataId)
304 # Getting with an explicit ref should fail if the id doesn't match
305 with self.assertRaises(ValueError):
306 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
308 # Getting a dataset with unknown parameters should fail
309 with self.assertRaises(KeyError):
310 butler.get(ref, parameters={"unsupported": True})
312 # Check we have a collection
313 collections = set(butler.registry.queryCollections())
314 self.assertEqual(collections, {run, tag})
316 # Clean up to check that we can remove something that may have
317 # already had a component removed
318 butler.pruneDatasets([ref], unstore=True, purge=True)
320 # Add a dataset back in since some downstream tests require
321 # something to be present
322 ref = butler.put(metric, refIn)
324 return butler
326 def testDeferredCollectionPassing(self):
327 # Construct a butler with no run or collection, but make it writeable.
328 butler = Butler(self.tmpConfigFile, writeable=True)
329 # Create and register a DatasetType
330 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
331 datasetType = self.addDatasetType("example", dimensions,
332 self.storageClassFactory.getStorageClass("StructuredData"),
333 butler.registry)
334 # Add needed Dimensions
335 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
336 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
337 "name": "d-r",
338 "abstract_filter": "R"})
339 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
340 "name": "fourtwentythree", "physical_filter": "d-r"})
341 dataId = {"instrument": "DummyCamComp", "visit": 423}
342 # Create dataset.
343 metric = makeExampleMetrics()
344 # Register a new run and put dataset.
345 run = "deferred"
346 butler.registry.registerRun(run)
347 ref = butler.put(metric, datasetType, dataId, run=run)
348 # Putting with no run should fail with TypeError.
349 with self.assertRaises(TypeError):
350 butler.put(metric, datasetType, dataId)
351 # Dataset should exist.
352 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
353 # We should be able to get the dataset back, but with and without
354 # a deferred dataset handle.
355 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
356 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
357 # Trying to find the dataset without any collection is a TypeError.
358 with self.assertRaises(TypeError):
359 butler.datasetExists(datasetType, dataId)
360 with self.assertRaises(TypeError):
361 butler.get(datasetType, dataId)
362 # Associate the dataset with a different collection.
363 butler.registry.registerCollection("tagged")
364 butler.registry.associate("tagged", [ref])
365 # Deleting the dataset from the new collection should make it findable
366 # in the original collection.
367 butler.pruneDatasets([ref], tags=["tagged"])
368 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
371class ButlerTests(ButlerPutGetTests):
372 """Tests for Butler.
373 """
374 useTempRoot = True
376 def setUp(self):
377 """Create a new butler root for each test."""
378 if self.useTempRoot:
379 self.root = tempfile.mkdtemp(dir=TESTDIR)
380 Butler.makeRepo(self.root, config=Config(self.configFile))
381 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
382 else:
383 self.root = None
384 self.tmpConfigFile = self.configFile
386 def testConstructor(self):
387 """Independent test of constructor.
388 """
389 butler = Butler(self.tmpConfigFile, run="ingest")
390 self.assertIsInstance(butler, Butler)
392 collections = set(butler.registry.queryCollections())
393 self.assertEqual(collections, {"ingest"})
395 butler2 = Butler(butler=butler, collections=["other"])
396 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
397 self.assertIsNone(butler2.run)
398 self.assertIs(butler.registry, butler2.registry)
399 self.assertIs(butler.datastore, butler2.datastore)
401 def testBasicPutGet(self):
402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
403 self.runPutGetTest(storageClass, "test_metric")
405 def testCompositePutGetConcrete(self):
407 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
408 butler = self.runPutGetTest(storageClass, "test_metric")
410 # Should *not* be disassembled
411 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
412 self.assertEqual(len(datasets), 1)
413 uri, components = butler.getURIs(datasets[0])
414 self.assertIsInstance(uri, ButlerURI)
415 self.assertFalse(components)
416 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
417 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
419 # Predicted dataset
420 dataId = {"instrument": "DummyCamComp", "visit": 424}
421 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
422 self.assertFalse(components)
423 self.assertIsInstance(uri, ButlerURI)
424 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
425 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
427 def testCompositePutGetVirtual(self):
428 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
429 butler = self.runPutGetTest(storageClass, "test_metric_comp")
431 # Should be disassembled
432 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
433 self.assertEqual(len(datasets), 1)
434 uri, components = butler.getURIs(datasets[0])
436 if butler.datastore.isEphemeral:
437 # Never disassemble in-memory datastore
438 self.assertIsInstance(uri, ButlerURI)
439 self.assertFalse(components)
440 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
441 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
442 else:
443 self.assertIsNone(uri)
444 self.assertEqual(set(components), set(storageClass.components))
445 for compuri in components.values():
446 self.assertIsInstance(compuri, ButlerURI)
447 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
448 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
450 # Predicted dataset
451 dataId = {"instrument": "DummyCamComp", "visit": 424}
452 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
454 if butler.datastore.isEphemeral:
455 # Never disassembled
456 self.assertIsInstance(uri, ButlerURI)
457 self.assertFalse(components)
458 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
459 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
460 else:
461 self.assertIsNone(uri)
462 self.assertEqual(set(components), set(storageClass.components))
463 for compuri in components.values():
464 self.assertIsInstance(compuri, ButlerURI)
465 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
466 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
468 def testIngest(self):
469 butler = Butler(self.tmpConfigFile, run="ingest")
471 # Create and register a DatasetType
472 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
475 datasetTypeName = "metric"
477 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
479 # Add needed Dimensions
480 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
481 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
482 "name": "d-r",
483 "abstract_filter": "R"})
484 for detector in (1, 2):
485 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
486 "full_name": f"detector{detector}"})
488 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
489 "name": "fourtwentythree", "physical_filter": "d-r"},
490 {"instrument": "DummyCamComp", "id": 424,
491 "name": "fourtwentyfour", "physical_filter": "d-r"})
493 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
494 dataRoot = os.path.join(TESTDIR, "data", "basic")
495 datasets = []
496 for detector in (1, 2):
497 detector_name = f"detector_{detector}"
498 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
499 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
500 # Create a DatasetRef for ingest
501 refIn = DatasetRef(datasetType, dataId, id=None)
503 datasets.append(FileDataset(path=metricFile,
504 refs=[refIn],
505 formatter=formatter))
507 butler.ingest(*datasets, transfer="copy")
509 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
510 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
512 metrics1 = butler.get(datasetTypeName, dataId1)
513 metrics2 = butler.get(datasetTypeName, dataId2)
514 self.assertNotEqual(metrics1, metrics2)
516 # Compare URIs
517 uri1 = butler.getURI(datasetTypeName, dataId1)
518 uri2 = butler.getURI(datasetTypeName, dataId2)
519 self.assertNotEqual(uri1, uri2)
521 # Now do a multi-dataset but single file ingest
522 metricFile = os.path.join(dataRoot, "detectors.yaml")
523 refs = []
524 for detector in (1, 2):
525 detector_name = f"detector_{detector}"
526 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
527 # Create a DatasetRef for ingest
528 refs.append(DatasetRef(datasetType, dataId, id=None))
530 datasets = []
531 datasets.append(FileDataset(path=metricFile,
532 refs=refs,
533 formatter=MultiDetectorFormatter))
535 butler.ingest(*datasets, transfer="copy")
537 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
538 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
540 multi1 = butler.get(datasetTypeName, dataId1)
541 multi2 = butler.get(datasetTypeName, dataId2)
543 self.assertEqual(multi1, metrics1)
544 self.assertEqual(multi2, metrics2)
546 # Compare URIs
547 uri1 = butler.getURI(datasetTypeName, dataId1)
548 uri2 = butler.getURI(datasetTypeName, dataId2)
549 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
551 # Test that removing one does not break the second
552 # This line will issue a warning log message for a ChainedDatastore
553 # that uses an InMemoryDatastore since in-memory can not ingest
554 # files.
555 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
556 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
557 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
558 multi2b = butler.get(datasetTypeName, dataId2)
559 self.assertEqual(multi2, multi2b)
561 def testPruneCollections(self):
562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
563 butler = Butler(self.tmpConfigFile, writeable=True)
564 # Load registry data with dimensions to hang datasets off of.
565 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
566 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
567 # Add some RUN-type collections.
568 run1 = "run1"
569 butler.registry.registerRun(run1)
570 run2 = "run2"
571 butler.registry.registerRun(run2)
572 # put some datasets. ref1 and ref2 have the same data ID, and are in
573 # different runs. ref3 has a different data ID.
574 metric = makeExampleMetrics()
575 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
576 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
577 butler.registry)
578 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
579 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
580 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
581 # Try to delete a RUN collection without purge, or with purge and not
582 # unstore.
583 with self.assertRaises(TypeError):
584 butler.pruneCollection(run1)
585 with self.assertRaises(TypeError):
586 butler.pruneCollection(run2, purge=True)
587 # Add a TAGGED collection and associate ref3 only into it.
588 tag1 = "tag1"
589 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
590 butler.registry.associate(tag1, [ref3])
591 # Add a CHAINED collection that searches run1 and then run2. It
592 # logically contains only ref1, because ref2 is shadowed due to them
593 # having the same data ID and dataset type.
594 chain1 = "chain1"
595 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
596 butler.registry.setCollectionChain(chain1, [run1, run2])
597 # Try to delete RUN collections, which should fail with complete
598 # rollback because they're still referenced by the CHAINED
599 # collection.
600 with self.assertRaises(Exception):
601 butler.pruneCollection(run1, pruge=True, unstore=True)
602 with self.assertRaises(Exception):
603 butler.pruneCollection(run2, pruge=True, unstore=True)
604 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
605 [ref1, ref2, ref3])
606 self.assertTrue(butler.datastore.exists(ref1))
607 self.assertTrue(butler.datastore.exists(ref2))
608 self.assertTrue(butler.datastore.exists(ref3))
609 # Try to delete CHAINED and TAGGED collections with purge; should not
610 # work.
611 with self.assertRaises(TypeError):
612 butler.pruneCollection(tag1, purge=True, unstore=True)
613 with self.assertRaises(TypeError):
614 butler.pruneCollection(chain1, purge=True, unstore=True)
615 # Remove the tagged collection with unstore=False. This should not
616 # affect the datasets.
617 butler.pruneCollection(tag1)
618 with self.assertRaises(MissingCollectionError):
619 butler.registry.getCollectionType(tag1)
620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
621 [ref1, ref2, ref3])
622 self.assertTrue(butler.datastore.exists(ref1))
623 self.assertTrue(butler.datastore.exists(ref2))
624 self.assertTrue(butler.datastore.exists(ref3))
625 # Add the tagged collection back in, and remove it with unstore=True.
626 # This should remove ref3 only from the datastore.
627 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
628 butler.registry.associate(tag1, [ref3])
629 butler.pruneCollection(tag1, unstore=True)
630 with self.assertRaises(MissingCollectionError):
631 butler.registry.getCollectionType(tag1)
632 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
633 [ref1, ref2, ref3])
634 self.assertTrue(butler.datastore.exists(ref1))
635 self.assertTrue(butler.datastore.exists(ref2))
636 self.assertFalse(butler.datastore.exists(ref3))
637 # Delete the chain with unstore=False. The datasets should not be
638 # affected at all.
639 butler.pruneCollection(chain1)
640 with self.assertRaises(MissingCollectionError):
641 butler.registry.getCollectionType(chain1)
642 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
643 [ref1, ref2, ref3])
644 self.assertTrue(butler.datastore.exists(ref1))
645 self.assertTrue(butler.datastore.exists(ref2))
646 self.assertFalse(butler.datastore.exists(ref3))
647 # Redefine and then delete the chain with unstore=True. Only ref1
648 # should be unstored (ref3 has already been unstored, but otherwise
649 # would be now).
650 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
651 butler.registry.setCollectionChain(chain1, [run1, run2])
652 butler.pruneCollection(chain1, unstore=True)
653 with self.assertRaises(MissingCollectionError):
654 butler.registry.getCollectionType(chain1)
655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
656 [ref1, ref2, ref3])
657 self.assertFalse(butler.datastore.exists(ref1))
658 self.assertTrue(butler.datastore.exists(ref2))
659 self.assertFalse(butler.datastore.exists(ref3))
660 # Remove run1. This removes ref1 and ref3 from the registry (they're
661 # already gone from the datastore, which is fine).
662 butler.pruneCollection(run1, purge=True, unstore=True)
663 with self.assertRaises(MissingCollectionError):
664 butler.registry.getCollectionType(run1)
665 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
666 [ref2])
667 self.assertTrue(butler.datastore.exists(ref2))
668 # Remove run2. This removes ref2 from the registry and the datastore.
669 butler.pruneCollection(run2, purge=True, unstore=True)
670 with self.assertRaises(MissingCollectionError):
671 butler.registry.getCollectionType(run2)
672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
673 [])
675 def testPickle(self):
676 """Test pickle support.
677 """
678 butler = Butler(self.tmpConfigFile, run="ingest")
679 butlerOut = pickle.loads(pickle.dumps(butler))
680 self.assertIsInstance(butlerOut, Butler)
681 self.assertEqual(butlerOut._config, butler._config)
682 self.assertEqual(butlerOut.collections, butler.collections)
683 self.assertEqual(butlerOut.run, butler.run)
685 def testGetDatasetTypes(self):
686 butler = Butler(self.tmpConfigFile, run="ingest")
687 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
688 dimensionEntries = [
689 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
690 {"instrument": "DummyCamComp"}),
691 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
692 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
693 ]
694 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
695 # Add needed Dimensions
696 for args in dimensionEntries:
697 butler.registry.insertDimensionData(*args)
699 # When a DatasetType is added to the registry entries are not created
700 # for components but querying them can return the components.
701 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
702 components = set()
703 for datasetTypeName in datasetTypeNames:
704 # Create and register a DatasetType
705 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
707 for componentName in storageClass.components:
708 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
710 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
711 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
713 # Now that we have some dataset types registered, validate them
714 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
715 "datasetType.component"])
717 # Add a new datasetType that will fail template validation
718 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
719 if self.validationCanFail:
720 with self.assertRaises(ValidationError):
721 butler.validateConfiguration()
723 # Rerun validation but with a subset of dataset type names
724 butler.validateConfiguration(datasetTypeNames=["metric4"])
726 # Rerun validation but ignore the bad datasetType
727 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
728 "datasetType.component"])
730 def testTransaction(self):
731 butler = Butler(self.tmpConfigFile, run="ingest")
732 datasetTypeName = "test_metric"
733 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
734 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
735 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
736 "abstract_filter": "R"}),
737 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
738 "physical_filter": "d-r"}))
739 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
740 metric = makeExampleMetrics()
741 dataId = {"instrument": "DummyCam", "visit": 42}
742 # Create and register a DatasetType
743 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
744 with self.assertRaises(TransactionTestError):
745 with butler.transaction():
746 # Add needed Dimensions
747 for args in dimensionEntries:
748 butler.registry.insertDimensionData(*args)
749 # Store a dataset
750 ref = butler.put(metric, datasetTypeName, dataId)
751 self.assertIsInstance(ref, DatasetRef)
752 # Test getDirect
753 metricOut = butler.getDirect(ref)
754 self.assertEqual(metric, metricOut)
755 # Test get
756 metricOut = butler.get(datasetTypeName, dataId)
757 self.assertEqual(metric, metricOut)
758 # Check we can get components
759 self.assertGetComponents(butler, ref,
760 ("summary", "data", "output"), metric)
761 raise TransactionTestError("This should roll back the entire transaction")
762 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
763 butler.registry.expandDataId(dataId)
764 # Should raise LookupError for missing data ID value
765 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
766 butler.get(datasetTypeName, dataId)
767 # Also check explicitly if Dataset entry is missing
768 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
769 # Direct retrieval should not find the file in the Datastore
770 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
771 butler.getDirect(ref)
773 def testMakeRepo(self):
774 """Test that we can write butler configuration to a new repository via
775 the Butler.makeRepo interface and then instantiate a butler from the
776 repo root.
777 """
778 # Do not run the test if we know this datastore configuration does
779 # not support a file system root
780 if self.fullConfigKey is None:
781 return
783 # Remove the file created in setUp
784 os.unlink(self.tmpConfigFile)
786 createRegistry = not self.useTempRoot
787 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
788 createRegistry=createRegistry)
789 limited = Config(self.configFile)
790 butler1 = Butler(butlerConfig)
791 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
792 config=Config(self.configFile), overwrite=True)
793 full = Config(self.tmpConfigFile)
794 butler2 = Butler(butlerConfig)
795 # Butlers should have the same configuration regardless of whether
796 # defaults were expanded.
797 self.assertEqual(butler1._config, butler2._config)
798 # Config files loaded directly should not be the same.
799 self.assertNotEqual(limited, full)
800 # Make sure "limited" doesn't have a few keys we know it should be
801 # inheriting from defaults.
802 self.assertIn(self.fullConfigKey, full)
803 self.assertNotIn(self.fullConfigKey, limited)
805 # Collections don't appear until something is put in them
806 collections1 = set(butler1.registry.queryCollections())
807 self.assertEqual(collections1, set())
808 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
810 # Check that a config with no associated file name will not
811 # work properly with relocatable Butler repo
812 butlerConfig.configFile = None
813 with self.assertRaises(ValueError):
814 Butler(butlerConfig)
816 with self.assertRaises(FileExistsError):
817 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
818 config=Config(self.configFile), overwrite=False)
820 def testStringification(self):
821 butler = Butler(self.tmpConfigFile, run="ingest")
822 butlerStr = str(butler)
824 if self.datastoreStr is not None:
825 for testStr in self.datastoreStr:
826 self.assertIn(testStr, butlerStr)
827 if self.registryStr is not None:
828 self.assertIn(self.registryStr, butlerStr)
830 datastoreName = butler.datastore.name
831 if self.datastoreName is not None:
832 for testStr in self.datastoreName:
833 self.assertIn(testStr, datastoreName)
836class FileLikeDatastoreButlerTests(ButlerTests):
837 """Common tests and specialization of ButlerTests for butlers backed
838 by datastores that inherit from FileLikeDatastore.
839 """
841 def checkFileExists(self, root, relpath):
842 """Checks if file exists at a given path (relative to root).
844 Test testPutTemplates verifies actual physical existance of the files
845 in the requested location.
846 """
847 uri = ButlerURI(root, forceDirectory=True)
848 return uri.join(relpath).exists()
850 def testPutTemplates(self):
851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
852 butler = Butler(self.tmpConfigFile, run="ingest")
854 # Add needed Dimensions
855 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
856 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
857 "name": "d-r",
858 "abstract_filter": "R"})
859 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
860 "physical_filter": "d-r"})
861 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
862 "physical_filter": "d-r"})
864 # Create and store a dataset
865 metric = makeExampleMetrics()
867 # Create two almost-identical DatasetTypes (both will use default
868 # template)
869 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
870 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
871 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
872 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
874 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
875 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
877 # Put with exactly the data ID keys needed
878 ref = butler.put(metric, "metric1", dataId1)
879 uri = butler.getURI(ref)
880 self.assertTrue(self.checkFileExists(butler.datastore.root,
881 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
882 f"Checking existence of {uri}")
884 # Check the template based on dimensions
885 butler.datastore.templates.validateTemplates([ref])
887 # Put with extra data ID keys (physical_filter is an optional
888 # dependency); should not change template (at least the way we're
889 # defining them to behave now; the important thing is that they
890 # must be consistent).
891 ref = butler.put(metric, "metric2", dataId2)
892 uri = butler.getURI(ref)
893 self.assertTrue(self.checkFileExists(butler.datastore.root,
894 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
895 f"Checking existence of {uri}")
897 # Check the template based on dimensions
898 butler.datastore.templates.validateTemplates([ref])
900 # Now use a file template that will not result in unique filenames
901 with self.assertRaises(FileTemplateValidationError):
902 butler.put(metric, "metric3", dataId1)
904 def testImportExport(self):
905 # Run put/get tests just to create and populate a repo.
906 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
907 self.runImportExportTest(storageClass)
909 @unittest.expectedFailure
910 def testImportExportVirtualComposite(self):
911 # Run put/get tests just to create and populate a repo.
912 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
913 self.runImportExportTest(storageClass)
915 def runImportExportTest(self, storageClass):
916 """This test does an export to a temp directory and an import back
917 into a new temp directory repo. It does not assume a posix datastore"""
918 exportButler = self.runPutGetTest(storageClass, "test_metric")
919 print("Root:", exportButler.datastore.root)
920 # Test that the repo actually has at least one dataset.
921 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
922 self.assertGreater(len(datasets), 0)
923 # Export those datasets. We used TemporaryDirectory because there
924 # doesn't seem to be a way to get the filename (as opposed to the file
925 # object) from any of tempfile's temporary-file context managers.
926 with tempfile.TemporaryDirectory() as exportDir:
927 # TODO: When PosixDatastore supports transfer-on-exist, add tests
928 # for that.
929 exportFile = os.path.join(exportDir, "exports.yaml")
930 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
931 export.saveDatasets(datasets)
932 self.assertTrue(os.path.exists(exportFile))
933 with tempfile.TemporaryDirectory() as importDir:
934 # We always want this to be a local posix butler
935 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
936 # Calling script.butlerImport tests the implementation of the
937 # butler command line interface "import" subcommand. Functions
938 # in the script folder are generally considered protected and
939 # should not be used as public api.
940 with open(exportFile, "r") as f:
941 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
942 directory=exportDir, transfer="auto", skip_dimensions=None)
943 importButler = Butler(importDir, run="ingest/run")
944 for ref in datasets:
945 with self.subTest(ref=ref):
946 # Test for existence by passing in the DatasetType and
947 # data ID separately, to avoid lookup by dataset_id.
948 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
951class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
952 """PosixDatastore specialization of a butler"""
953 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
954 fullConfigKey = ".datastore.formatters"
955 validationCanFail = True
956 datastoreStr = ["/tmp"]
957 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
958 registryStr = "/gen3.sqlite3"
960 def testExportTransferCopy(self):
961 """Test local export using all transfer modes"""
962 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
963 exportButler = self.runPutGetTest(storageClass, "test_metric")
964 # Test that the repo actually has at least one dataset.
965 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
966 self.assertGreater(len(datasets), 0)
967 uris = [exportButler.getURI(d) for d in datasets]
968 datastoreRoot = ButlerURI(exportButler.datastore.root, forceDirectory=True)
970 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
972 for path in pathsInStore:
973 # Assume local file system
974 self.assertTrue(self.checkFileExists(datastoreRoot, path),
975 f"Checking path {path}")
977 for transfer in ("copy", "link", "symlink", "relsymlink"):
978 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
979 with exportButler.export(directory=exportDir, format="yaml",
980 transfer=transfer) as export:
981 export.saveDatasets(datasets)
982 for path in pathsInStore:
983 self.assertTrue(self.checkFileExists(exportDir, path),
984 f"Check that mode {transfer} exported files")
987class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
988 """InMemoryDatastore specialization of a butler"""
989 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
990 fullConfigKey = None
991 useTempRoot = False
992 validationCanFail = False
993 datastoreStr = ["datastore='InMemory"]
994 datastoreName = ["InMemoryDatastore@"]
995 registryStr = ":memory:"
997 def testIngest(self):
998 pass
1001class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1002 """PosixDatastore specialization"""
1003 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1004 fullConfigKey = ".datastore.datastores.1.formatters"
1005 validationCanFail = True
1006 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1007 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1008 "SecondDatastore"]
1009 registryStr = "/gen3.sqlite3"
1012class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1013 """Test that a yaml file in one location can refer to a root in another."""
1015 datastoreStr = ["dir1"]
1016 # Disable the makeRepo test since we are deliberately not using
1017 # butler.yaml as the config name.
1018 fullConfigKey = None
1020 def setUp(self):
1021 self.root = tempfile.mkdtemp(dir=TESTDIR)
1023 # Make a new repository in one place
1024 self.dir1 = os.path.join(self.root, "dir1")
1025 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1027 # Move the yaml file to a different place and add a "root"
1028 self.dir2 = os.path.join(self.root, "dir2")
1029 safeMakeDir(self.dir2)
1030 configFile1 = os.path.join(self.dir1, "butler.yaml")
1031 config = Config(configFile1)
1032 config["root"] = self.dir1
1033 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1034 config.dumpToUri(configFile2)
1035 os.remove(configFile1)
1036 self.tmpConfigFile = configFile2
1038 def testFileLocations(self):
1039 self.assertNotEqual(self.dir1, self.dir2)
1040 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1041 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1042 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1045class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1046 """Test that a config file created by makeRepo outside of repo works."""
1048 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1050 def setUp(self):
1051 self.root = tempfile.mkdtemp(dir=TESTDIR)
1052 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1054 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1055 Butler.makeRepo(self.root, config=Config(self.configFile),
1056 outfile=self.tmpConfigFile)
1058 def tearDown(self):
1059 if os.path.exists(self.root2):
1060 shutil.rmtree(self.root2, ignore_errors=True)
1061 super().tearDown()
1063 def testConfigExistence(self):
1064 c = Config(self.tmpConfigFile)
1065 uri_config = ButlerURI(c["root"])
1066 uri_expected = ButlerURI(self.root, forceDirectory=True)
1067 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1068 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1070 def testPutGet(self):
1071 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1072 self.runPutGetTest(storageClass, "test_metric")
1075class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1076 """Test that a config file created by makeRepo outside of repo works."""
1078 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1080 def setUp(self):
1081 self.root = tempfile.mkdtemp(dir=TESTDIR)
1082 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1084 self.tmpConfigFile = self.root2
1085 Butler.makeRepo(self.root, config=Config(self.configFile),
1086 outfile=self.tmpConfigFile)
1088 def testConfigExistence(self):
1089 # Append the yaml file else Config constructor does not know the file
1090 # type.
1091 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1092 super().testConfigExistence()
1095class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1096 """Test that a config file created by makeRepo outside of repo works."""
1098 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1100 def setUp(self):
1101 self.root = tempfile.mkdtemp(dir=TESTDIR)
1102 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1104 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1105 Butler.makeRepo(self.root, config=Config(self.configFile),
1106 outfile=self.tmpConfigFile)
1109@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1110@mock_s3
1111class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1112 """S3Datastore specialization of a butler; an S3 storage Datastore +
1113 a local in-memory SqlRegistry.
1114 """
1115 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1116 fullConfigKey = None
1117 validationCanFail = True
1119 bucketName = "anybucketname"
1120 """Name of the Bucket that will be used in the tests. The name is read from
1121 the config file used with the tests during set-up.
1122 """
1124 root = "butlerRoot/"
1125 """Root repository directory expected to be used in case useTempRoot=False.
1126 Otherwise the root is set to a 20 characters long randomly generated string
1127 during set-up.
1128 """
1130 datastoreStr = [f"datastore={root}"]
1131 """Contains all expected root locations in a format expected to be
1132 returned by Butler stringification.
1133 """
1135 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1136 """The expected format of the S3Datastore string."""
1138 registryStr = ":memory:"
1139 """Expected format of the Registry string."""
1141 def genRoot(self):
1142 """Returns a random string of len 20 to serve as a root
1143 name for the temporary bucket repo.
1145 This is equivalent to tempfile.mkdtemp as this is what self.root
1146 becomes when useTempRoot is True.
1147 """
1148 rndstr = "".join(
1149 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1150 )
1151 return rndstr + "/"
1153 def setUp(self):
1154 config = Config(self.configFile)
1155 uri = ButlerURI(config[".datastore.datastore.root"])
1156 self.bucketName = uri.netloc
1158 # set up some fake credentials if they do not exist
1159 self.usingDummyCredentials = setAwsEnvCredentials()
1161 if self.useTempRoot:
1162 self.root = self.genRoot()
1163 rooturi = f"s3://{self.bucketName}/{self.root}"
1164 config.update({"datastore": {"datastore": {"root": rooturi}}})
1166 # MOTO needs to know that we expect Bucket bucketname to exist
1167 # (this used to be the class attribute bucketName)
1168 s3 = boto3.resource("s3")
1169 s3.create_bucket(Bucket=self.bucketName)
1171 self.datastoreStr = f"datastore={self.root}"
1172 self.datastoreName = [f"S3Datastore@{rooturi}"]
1173 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1174 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1176 def tearDown(self):
1177 s3 = boto3.resource("s3")
1178 bucket = s3.Bucket(self.bucketName)
1179 try:
1180 bucket.objects.all().delete()
1181 except botocore.exceptions.ClientError as e:
1182 if e.response["Error"]["Code"] == "404":
1183 # the key was not reachable - pass
1184 pass
1185 else:
1186 raise
1188 bucket = s3.Bucket(self.bucketName)
1189 bucket.delete()
1191 # unset any potentially set dummy credentials
1192 if self.usingDummyCredentials:
1193 unsetAwsEnvCredentials()
1196if __name__ == "__main__": 1196 ↛ 1197line 1196 didn't jump to line 1197, because the condition on line 1196 was never true
1197 unittest.main()