Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46from lsst.utils import doImport
47from lsst.daf.butler.core.utils import safeMakeDir
48from lsst.daf.butler import Butler, Config, ButlerConfig
49from lsst.daf.butler import StorageClassFactory
50from lsst.daf.butler import DatasetType, DatasetRef
51from lsst.daf.butler import FileTemplateValidationError, ValidationError
52from lsst.daf.butler import FileDataset
53from lsst.daf.butler import CollectionSearch, CollectionType
54from lsst.daf.butler import ButlerURI
55from lsst.daf.butler import script
56from lsst.daf.butler.registry import MissingCollectionError
57from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
59 unsetAwsEnvCredentials)
61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
63TESTDIR = os.path.abspath(os.path.dirname(__file__))
66def makeExampleMetrics():
67 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
68 {"a": [1, 2, 3],
69 "b": {"blue": 5, "red": "green"}},
70 [563, 234, 456.7, 752, 8, 9, 27]
71 )
74class TransactionTestError(Exception):
75 """Specific error for testing transactions, to prevent misdiagnosing
76 that might otherwise occur when a standard exception is used.
77 """
78 pass
81class ButlerConfigTests(unittest.TestCase):
82 """Simple tests for ButlerConfig that are not tested in other test cases.
83 """
85 def testSearchPath(self):
86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
88 config1 = ButlerConfig(configFile)
89 self.assertNotIn("testConfigs", "\n".join(cm.output))
91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
94 self.assertIn("testConfigs", "\n".join(cm.output))
96 key = ("datastore", "records", "table")
97 self.assertNotEqual(config1[key], config2[key])
98 self.assertEqual(config2[key], "override_record")
101class ButlerPutGetTests:
102 """Helper method for running a suite of put/get tests from different
103 butler configurations."""
105 root = None
107 @staticmethod
108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
109 """Create a DatasetType and register it
110 """
111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
112 registry.registerDatasetType(datasetType)
113 return datasetType
115 @classmethod
116 def setUpClass(cls):
117 cls.storageClassFactory = StorageClassFactory()
118 cls.storageClassFactory.addFromConfig(cls.configFile)
120 def assertGetComponents(self, butler, datasetRef, components, reference):
121 datasetType = datasetRef.datasetType
122 dataId = datasetRef.dataId
123 for component in components:
124 compTypeName = datasetType.componentTypeName(component)
125 result = butler.get(compTypeName, dataId)
126 self.assertEqual(result, getattr(reference, component))
128 def tearDown(self):
129 if self.root is not None and os.path.exists(self.root):
130 shutil.rmtree(self.root, ignore_errors=True)
132 def runPutGetTest(self, storageClass, datasetTypeName):
133 # New datasets will be added to run and tag, but we will only look in
134 # tag when looking up datasets.
135 run = "ingest/run"
136 tag = "ingest"
137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
139 # There will not be a collection yet
140 collections = set(butler.registry.queryCollections())
141 self.assertEqual(collections, set([run, tag]))
143 # Create and register a DatasetType
144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
148 # Try to create one that will have a name that is too long
149 with self.assertRaises(Exception) as cm:
150 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
151 self.assertIn("check constraint", str(cm.exception).lower())
153 # Add needed Dimensions
154 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
155 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
156 "name": "d-r",
157 "abstract_filter": "R"})
158 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
159 "id": 1,
160 "name": "default"})
161 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
162 "name": "fourtwentythree", "physical_filter": "d-r",
163 "visit_system": 1})
165 # Add a second visit for some later tests
166 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
167 "name": "fourtwentyfour", "physical_filter": "d-r",
168 "visit_system": 1})
170 # Create and store a dataset
171 metric = makeExampleMetrics()
172 dataId = {"instrument": "DummyCamComp", "visit": 423}
174 # Create a DatasetRef for put
175 refIn = DatasetRef(datasetType, dataId, id=None)
177 # Put with a preexisting id should fail
178 with self.assertRaises(ValueError):
179 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
181 # Put and remove the dataset once as a DatasetRef, once as a dataId,
182 # and once with a DatasetType
183 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
184 with self.subTest(args=args):
185 ref = butler.put(metric, *args)
186 self.assertIsInstance(ref, DatasetRef)
188 # Test getDirect
189 metricOut = butler.getDirect(ref)
190 self.assertEqual(metric, metricOut)
191 # Test get
192 metricOut = butler.get(ref.datasetType.name, dataId)
193 self.assertEqual(metric, metricOut)
194 # Test get with a datasetRef
195 metricOut = butler.get(ref)
196 self.assertEqual(metric, metricOut)
197 # Test getDeferred with dataId
198 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
199 self.assertEqual(metric, metricOut)
200 # Test getDeferred with a datasetRef
201 metricOut = butler.getDeferred(ref).get()
202 self.assertEqual(metric, metricOut)
204 # Check we can get components
205 if storageClass.isComposite():
206 self.assertGetComponents(butler, ref,
207 ("summary", "data", "output"), metric)
209 # Remove from the tagged collection only; after that we
210 # shouldn't be able to find it unless we use the dataset_id.
211 butler.pruneDatasets([ref])
212 with self.assertRaises(LookupError):
213 butler.datasetExists(*args)
214 # Registry still knows about it, if we use the dataset_id.
215 self.assertEqual(butler.registry.getDataset(ref.id), ref)
216 # If we use the output ref with the dataset_id, we should
217 # still be able to load it with getDirect().
218 self.assertEqual(metric, butler.getDirect(ref))
220 # Reinsert into collection, then delete from Datastore *and*
221 # remove from collection.
222 butler.registry.associate(tag, [ref])
223 butler.pruneDatasets([ref], unstore=True)
224 # Lookup with original args should still fail.
225 with self.assertRaises(LookupError):
226 butler.datasetExists(*args)
227 # Now getDirect() should fail, too.
228 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
229 butler.getDirect(ref)
230 # Registry still knows about it, if we use the dataset_id.
231 self.assertEqual(butler.registry.getDataset(ref.id), ref)
233 # Now remove the dataset completely.
234 butler.pruneDatasets([ref], purge=True, unstore=True)
235 # Lookup with original args should still fail.
236 with self.assertRaises(LookupError):
237 butler.datasetExists(*args)
238 # getDirect() should still fail.
239 with self.assertRaises(FileNotFoundError):
240 butler.getDirect(ref)
241 # Registry shouldn't be able to find it by dataset_id anymore.
242 self.assertIsNone(butler.registry.getDataset(ref.id))
244 # Put the dataset again, since the last thing we did was remove it.
245 ref = butler.put(metric, refIn)
247 # Get with parameters
248 stop = 4
249 sliced = butler.get(ref, parameters={"slice": slice(stop)})
250 self.assertNotEqual(metric, sliced)
251 self.assertEqual(metric.summary, sliced.summary)
252 self.assertEqual(metric.output, sliced.output)
253 self.assertEqual(metric.data[:stop], sliced.data)
254 # getDeferred with parameters
255 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
256 self.assertNotEqual(metric, sliced)
257 self.assertEqual(metric.summary, sliced.summary)
258 self.assertEqual(metric.output, sliced.output)
259 self.assertEqual(metric.data[:stop], sliced.data)
260 # getDeferred with deferred parameters
261 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
262 self.assertNotEqual(metric, sliced)
263 self.assertEqual(metric.summary, sliced.summary)
264 self.assertEqual(metric.output, sliced.output)
265 self.assertEqual(metric.data[:stop], sliced.data)
267 if storageClass.isComposite():
268 # Check that components can be retrieved
269 metricOut = butler.get(ref.datasetType.name, dataId)
270 compNameS = ref.datasetType.componentTypeName("summary")
271 compNameD = ref.datasetType.componentTypeName("data")
272 summary = butler.get(compNameS, dataId)
273 self.assertEqual(summary, metric.summary)
274 data = butler.get(compNameD, dataId)
275 self.assertEqual(data, metric.data)
277 if "counter" in storageClass.readComponents:
278 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
279 self.assertEqual(count, len(data))
281 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
282 parameters={"slice": slice(stop)})
283 self.assertEqual(count, stop)
285 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
286 summary = butler.getDirect(compRef)
287 self.assertEqual(summary, metric.summary)
289 # Create a Dataset type that has the same name but is inconsistent.
290 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
291 self.storageClassFactory.getStorageClass("Config"))
293 # Getting with a dataset type that does not match registry fails
294 with self.assertRaises(ValueError):
295 butler.get(inconsistentDatasetType, dataId)
297 # Combining a DatasetRef with a dataId should fail
298 with self.assertRaises(ValueError):
299 butler.get(ref, dataId)
300 # Getting with an explicit ref should fail if the id doesn't match
301 with self.assertRaises(ValueError):
302 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
304 # Getting a dataset with unknown parameters should fail
305 with self.assertRaises(KeyError):
306 butler.get(ref, parameters={"unsupported": True})
308 # Check we have a collection
309 collections = set(butler.registry.queryCollections())
310 self.assertEqual(collections, {run, tag})
312 # Clean up to check that we can remove something that may have
313 # already had a component removed
314 butler.pruneDatasets([ref], unstore=True, purge=True)
316 # Add a dataset back in since some downstream tests require
317 # something to be present
318 ref = butler.put(metric, refIn)
320 return butler
322 def testDeferredCollectionPassing(self):
323 # Construct a butler with no run or collection, but make it writeable.
324 butler = Butler(self.tmpConfigFile, writeable=True)
325 # Create and register a DatasetType
326 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
327 datasetType = self.addDatasetType("example", dimensions,
328 self.storageClassFactory.getStorageClass("StructuredData"),
329 butler.registry)
330 # Add needed Dimensions
331 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
332 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
333 "name": "d-r",
334 "abstract_filter": "R"})
335 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
336 "name": "fourtwentythree", "physical_filter": "d-r"})
337 dataId = {"instrument": "DummyCamComp", "visit": 423}
338 # Create dataset.
339 metric = makeExampleMetrics()
340 # Register a new run and put dataset.
341 run = "deferred"
342 butler.registry.registerRun(run)
343 ref = butler.put(metric, datasetType, dataId, run=run)
344 # Putting with no run should fail with TypeError.
345 with self.assertRaises(TypeError):
346 butler.put(metric, datasetType, dataId)
347 # Dataset should exist.
348 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
349 # We should be able to get the dataset back, but with and without
350 # a deferred dataset handle.
351 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
352 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
353 # Trying to find the dataset without any collection is a TypeError.
354 with self.assertRaises(TypeError):
355 butler.datasetExists(datasetType, dataId)
356 with self.assertRaises(TypeError):
357 butler.get(datasetType, dataId)
358 # Associate the dataset with a different collection.
359 butler.registry.registerCollection("tagged")
360 butler.registry.associate("tagged", [ref])
361 # Deleting the dataset from the new collection should make it findable
362 # in the original collection.
363 butler.pruneDatasets([ref], tags=["tagged"])
364 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
367class ButlerTests(ButlerPutGetTests):
368 """Tests for Butler.
369 """
370 useTempRoot = True
372 def setUp(self):
373 """Create a new butler root for each test."""
374 if self.useTempRoot:
375 self.root = tempfile.mkdtemp(dir=TESTDIR)
376 Butler.makeRepo(self.root, config=Config(self.configFile))
377 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
378 else:
379 self.root = None
380 self.tmpConfigFile = self.configFile
382 def testConstructor(self):
383 """Independent test of constructor.
384 """
385 butler = Butler(self.tmpConfigFile, run="ingest")
386 self.assertIsInstance(butler, Butler)
388 collections = set(butler.registry.queryCollections())
389 self.assertEqual(collections, {"ingest"})
391 butler2 = Butler(butler=butler, collections=["other"])
392 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
393 self.assertIsNone(butler2.run)
394 self.assertIs(butler.registry, butler2.registry)
395 self.assertIs(butler.datastore, butler2.datastore)
397 def testBasicPutGet(self):
398 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
399 self.runPutGetTest(storageClass, "test_metric")
401 def testCompositePutGetConcrete(self):
403 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
404 butler = self.runPutGetTest(storageClass, "test_metric")
406 # Should *not* be disassembled
407 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
408 self.assertEqual(len(datasets), 1)
409 uri, components = butler.getURIs(datasets[0])
410 self.assertIsInstance(uri, ButlerURI)
411 self.assertFalse(components)
412 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
413 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
415 # Predicted dataset
416 dataId = {"instrument": "DummyCamComp", "visit": 424}
417 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
418 self.assertFalse(components)
419 self.assertIsInstance(uri, ButlerURI)
420 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
421 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
423 def testCompositePutGetVirtual(self):
424 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
425 butler = self.runPutGetTest(storageClass, "test_metric_comp")
427 # Should be disassembled
428 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
429 self.assertEqual(len(datasets), 1)
430 uri, components = butler.getURIs(datasets[0])
432 if butler.datastore.isEphemeral:
433 # Never disassemble in-memory datastore
434 self.assertIsInstance(uri, ButlerURI)
435 self.assertFalse(components)
436 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
437 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
438 else:
439 self.assertIsNone(uri)
440 self.assertEqual(set(components), set(storageClass.components))
441 for compuri in components.values():
442 self.assertIsInstance(compuri, ButlerURI)
443 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
444 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
446 # Predicted dataset
447 dataId = {"instrument": "DummyCamComp", "visit": 424}
448 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
450 if butler.datastore.isEphemeral:
451 # Never disassembled
452 self.assertIsInstance(uri, ButlerURI)
453 self.assertFalse(components)
454 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
455 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
456 else:
457 self.assertIsNone(uri)
458 self.assertEqual(set(components), set(storageClass.components))
459 for compuri in components.values():
460 self.assertIsInstance(compuri, ButlerURI)
461 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
462 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
464 def testIngest(self):
465 butler = Butler(self.tmpConfigFile, run="ingest")
467 # Create and register a DatasetType
468 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
470 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
471 datasetTypeName = "metric"
473 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
475 # Add needed Dimensions
476 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
477 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
478 "name": "d-r",
479 "abstract_filter": "R"})
480 for detector in (1, 2):
481 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
482 "full_name": f"detector{detector}"})
484 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
485 "name": "fourtwentythree", "physical_filter": "d-r"},
486 {"instrument": "DummyCamComp", "id": 424,
487 "name": "fourtwentyfour", "physical_filter": "d-r"})
489 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
490 dataRoot = os.path.join(TESTDIR, "data", "basic")
491 datasets = []
492 for detector in (1, 2):
493 detector_name = f"detector_{detector}"
494 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
495 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
496 # Create a DatasetRef for ingest
497 refIn = DatasetRef(datasetType, dataId, id=None)
499 datasets.append(FileDataset(path=metricFile,
500 refs=[refIn],
501 formatter=formatter))
503 butler.ingest(*datasets, transfer="copy")
505 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
506 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
508 metrics1 = butler.get(datasetTypeName, dataId1)
509 metrics2 = butler.get(datasetTypeName, dataId2)
510 self.assertNotEqual(metrics1, metrics2)
512 # Compare URIs
513 uri1 = butler.getURI(datasetTypeName, dataId1)
514 uri2 = butler.getURI(datasetTypeName, dataId2)
515 self.assertNotEqual(uri1, uri2)
517 # Now do a multi-dataset but single file ingest
518 metricFile = os.path.join(dataRoot, "detectors.yaml")
519 refs = []
520 for detector in (1, 2):
521 detector_name = f"detector_{detector}"
522 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
523 # Create a DatasetRef for ingest
524 refs.append(DatasetRef(datasetType, dataId, id=None))
526 datasets = []
527 datasets.append(FileDataset(path=metricFile,
528 refs=refs,
529 formatter=MultiDetectorFormatter))
531 butler.ingest(*datasets, transfer="copy")
533 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
534 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
536 multi1 = butler.get(datasetTypeName, dataId1)
537 multi2 = butler.get(datasetTypeName, dataId2)
539 self.assertEqual(multi1, metrics1)
540 self.assertEqual(multi2, metrics2)
542 # Compare URIs
543 uri1 = butler.getURI(datasetTypeName, dataId1)
544 uri2 = butler.getURI(datasetTypeName, dataId2)
545 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
547 # Test that removing one does not break the second
548 # This line will issue a warning log message for a ChainedDatastore
549 # that uses an InMemoryDatastore since in-memory can not ingest
550 # files.
551 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
552 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
553 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
554 multi2b = butler.get(datasetTypeName, dataId2)
555 self.assertEqual(multi2, multi2b)
557 def testPruneCollections(self):
558 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
559 butler = Butler(self.tmpConfigFile, writeable=True)
560 # Load registry data with dimensions to hang datasets off of.
561 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
562 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
563 # Add some RUN-type collections.
564 run1 = "run1"
565 butler.registry.registerRun(run1)
566 run2 = "run2"
567 butler.registry.registerRun(run2)
568 # put some datasets. ref1 and ref2 have the same data ID, and are in
569 # different runs. ref3 has a different data ID.
570 metric = makeExampleMetrics()
571 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
572 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
573 butler.registry)
574 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
575 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
576 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
577 # Try to delete a RUN collection without purge, or with purge and not
578 # unstore.
579 with self.assertRaises(TypeError):
580 butler.pruneCollection(run1)
581 with self.assertRaises(TypeError):
582 butler.pruneCollection(run2, purge=True)
583 # Add a TAGGED collection and associate ref3 only into it.
584 tag1 = "tag1"
585 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
586 butler.registry.associate(tag1, [ref3])
587 # Add a CHAINED collection that searches run1 and then run2. It
588 # logically contains only ref1, because ref2 is shadowed due to them
589 # having the same data ID and dataset type.
590 chain1 = "chain1"
591 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
592 butler.registry.setCollectionChain(chain1, [run1, run2])
593 # Try to delete RUN collections, which should fail with complete
594 # rollback because they're still referenced by the CHAINED
595 # collection.
596 with self.assertRaises(Exception):
597 butler.pruneCollection(run1, pruge=True, unstore=True)
598 with self.assertRaises(Exception):
599 butler.pruneCollection(run2, pruge=True, unstore=True)
600 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
601 [ref1, ref2, ref3])
602 self.assertTrue(butler.datastore.exists(ref1))
603 self.assertTrue(butler.datastore.exists(ref2))
604 self.assertTrue(butler.datastore.exists(ref3))
605 # Try to delete CHAINED and TAGGED collections with purge; should not
606 # work.
607 with self.assertRaises(TypeError):
608 butler.pruneCollection(tag1, purge=True, unstore=True)
609 with self.assertRaises(TypeError):
610 butler.pruneCollection(chain1, purge=True, unstore=True)
611 # Remove the tagged collection with unstore=False. This should not
612 # affect the datasets.
613 butler.pruneCollection(tag1)
614 with self.assertRaises(MissingCollectionError):
615 butler.registry.getCollectionType(tag1)
616 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
617 [ref1, ref2, ref3])
618 self.assertTrue(butler.datastore.exists(ref1))
619 self.assertTrue(butler.datastore.exists(ref2))
620 self.assertTrue(butler.datastore.exists(ref3))
621 # Add the tagged collection back in, and remove it with unstore=True.
622 # This should remove ref3 only from the datastore.
623 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
624 butler.registry.associate(tag1, [ref3])
625 butler.pruneCollection(tag1, unstore=True)
626 with self.assertRaises(MissingCollectionError):
627 butler.registry.getCollectionType(tag1)
628 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
629 [ref1, ref2, ref3])
630 self.assertTrue(butler.datastore.exists(ref1))
631 self.assertTrue(butler.datastore.exists(ref2))
632 self.assertFalse(butler.datastore.exists(ref3))
633 # Delete the chain with unstore=False. The datasets should not be
634 # affected at all.
635 butler.pruneCollection(chain1)
636 with self.assertRaises(MissingCollectionError):
637 butler.registry.getCollectionType(chain1)
638 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
639 [ref1, ref2, ref3])
640 self.assertTrue(butler.datastore.exists(ref1))
641 self.assertTrue(butler.datastore.exists(ref2))
642 self.assertFalse(butler.datastore.exists(ref3))
643 # Redefine and then delete the chain with unstore=True. Only ref1
644 # should be unstored (ref3 has already been unstored, but otherwise
645 # would be now).
646 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
647 butler.registry.setCollectionChain(chain1, [run1, run2])
648 butler.pruneCollection(chain1, unstore=True)
649 with self.assertRaises(MissingCollectionError):
650 butler.registry.getCollectionType(chain1)
651 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
652 [ref1, ref2, ref3])
653 self.assertFalse(butler.datastore.exists(ref1))
654 self.assertTrue(butler.datastore.exists(ref2))
655 self.assertFalse(butler.datastore.exists(ref3))
656 # Remove run1. This removes ref1 and ref3 from the registry (they're
657 # already gone from the datastore, which is fine).
658 butler.pruneCollection(run1, purge=True, unstore=True)
659 with self.assertRaises(MissingCollectionError):
660 butler.registry.getCollectionType(run1)
661 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
662 [ref2])
663 self.assertTrue(butler.datastore.exists(ref2))
664 # Remove run2. This removes ref2 from the registry and the datastore.
665 butler.pruneCollection(run2, purge=True, unstore=True)
666 with self.assertRaises(MissingCollectionError):
667 butler.registry.getCollectionType(run2)
668 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
669 [])
671 def testPickle(self):
672 """Test pickle support.
673 """
674 butler = Butler(self.tmpConfigFile, run="ingest")
675 butlerOut = pickle.loads(pickle.dumps(butler))
676 self.assertIsInstance(butlerOut, Butler)
677 self.assertEqual(butlerOut._config, butler._config)
678 self.assertEqual(butlerOut.collections, butler.collections)
679 self.assertEqual(butlerOut.run, butler.run)
681 def testGetDatasetTypes(self):
682 butler = Butler(self.tmpConfigFile, run="ingest")
683 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
684 dimensionEntries = [
685 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
686 {"instrument": "DummyCamComp"}),
687 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
688 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
689 ]
690 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
691 # Add needed Dimensions
692 for args in dimensionEntries:
693 butler.registry.insertDimensionData(*args)
695 # When a DatasetType is added to the registry entries are not created
696 # for components but querying them can return the components.
697 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
698 components = set()
699 for datasetTypeName in datasetTypeNames:
700 # Create and register a DatasetType
701 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
703 for componentName in storageClass.components:
704 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
706 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
707 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
709 # Now that we have some dataset types registered, validate them
710 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
711 "datasetType.component"])
713 # Add a new datasetType that will fail template validation
714 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
715 if self.validationCanFail:
716 with self.assertRaises(ValidationError):
717 butler.validateConfiguration()
719 # Rerun validation but with a subset of dataset type names
720 butler.validateConfiguration(datasetTypeNames=["metric4"])
722 # Rerun validation but ignore the bad datasetType
723 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
724 "datasetType.component"])
726 def testTransaction(self):
727 butler = Butler(self.tmpConfigFile, run="ingest")
728 datasetTypeName = "test_metric"
729 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
730 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
731 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
732 "abstract_filter": "R"}),
733 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
734 "physical_filter": "d-r"}))
735 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
736 metric = makeExampleMetrics()
737 dataId = {"instrument": "DummyCam", "visit": 42}
738 # Create and register a DatasetType
739 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
740 with self.assertRaises(TransactionTestError):
741 with butler.transaction():
742 # Add needed Dimensions
743 for args in dimensionEntries:
744 butler.registry.insertDimensionData(*args)
745 # Store a dataset
746 ref = butler.put(metric, datasetTypeName, dataId)
747 self.assertIsInstance(ref, DatasetRef)
748 # Test getDirect
749 metricOut = butler.getDirect(ref)
750 self.assertEqual(metric, metricOut)
751 # Test get
752 metricOut = butler.get(datasetTypeName, dataId)
753 self.assertEqual(metric, metricOut)
754 # Check we can get components
755 self.assertGetComponents(butler, ref,
756 ("summary", "data", "output"), metric)
757 raise TransactionTestError("This should roll back the entire transaction")
758 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
759 butler.registry.expandDataId(dataId)
760 # Should raise LookupError for missing data ID value
761 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
762 butler.get(datasetTypeName, dataId)
763 # Also check explicitly if Dataset entry is missing
764 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
765 # Direct retrieval should not find the file in the Datastore
766 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
767 butler.getDirect(ref)
769 def testMakeRepo(self):
770 """Test that we can write butler configuration to a new repository via
771 the Butler.makeRepo interface and then instantiate a butler from the
772 repo root.
773 """
774 # Do not run the test if we know this datastore configuration does
775 # not support a file system root
776 if self.fullConfigKey is None:
777 return
779 # Remove the file created in setUp
780 os.unlink(self.tmpConfigFile)
782 createRegistry = not self.useTempRoot
783 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
784 createRegistry=createRegistry)
785 limited = Config(self.configFile)
786 butler1 = Butler(butlerConfig)
787 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
788 config=Config(self.configFile), overwrite=True)
789 full = Config(self.tmpConfigFile)
790 butler2 = Butler(butlerConfig)
791 # Butlers should have the same configuration regardless of whether
792 # defaults were expanded.
793 self.assertEqual(butler1._config, butler2._config)
794 # Config files loaded directly should not be the same.
795 self.assertNotEqual(limited, full)
796 # Make sure "limited" doesn't have a few keys we know it should be
797 # inheriting from defaults.
798 self.assertIn(self.fullConfigKey, full)
799 self.assertNotIn(self.fullConfigKey, limited)
801 # Collections don't appear until something is put in them
802 collections1 = set(butler1.registry.queryCollections())
803 self.assertEqual(collections1, set())
804 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
806 # Check that a config with no associated file name will not
807 # work properly with relocatable Butler repo
808 butlerConfig.configFile = None
809 with self.assertRaises(ValueError):
810 Butler(butlerConfig)
812 with self.assertRaises(FileExistsError):
813 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
814 config=Config(self.configFile), overwrite=False)
816 def testStringification(self):
817 butler = Butler(self.tmpConfigFile, run="ingest")
818 butlerStr = str(butler)
820 if self.datastoreStr is not None:
821 for testStr in self.datastoreStr:
822 self.assertIn(testStr, butlerStr)
823 if self.registryStr is not None:
824 self.assertIn(self.registryStr, butlerStr)
826 datastoreName = butler.datastore.name
827 if self.datastoreName is not None:
828 for testStr in self.datastoreName:
829 self.assertIn(testStr, datastoreName)
832class FileLikeDatastoreButlerTests(ButlerTests):
833 """Common tests and specialization of ButlerTests for butlers backed
834 by datastores that inherit from FileLikeDatastore.
835 """
837 def checkFileExists(self, root, path):
838 """Checks if file exists at a given path (relative to root).
840 Test testPutTemplates verifies actual physical existance of the files
841 in the requested location. For POSIXDatastore this test is equivalent
842 to `os.path.exists` call.
843 """
844 return os.path.exists(os.path.join(root, path))
846 def testPutTemplates(self):
847 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
848 butler = Butler(self.tmpConfigFile, run="ingest")
850 # Add needed Dimensions
851 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
852 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
853 "name": "d-r",
854 "abstract_filter": "R"})
855 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
856 "physical_filter": "d-r"})
857 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
858 "physical_filter": "d-r"})
860 # Create and store a dataset
861 metric = makeExampleMetrics()
863 # Create two almost-identical DatasetTypes (both will use default
864 # template)
865 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
866 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
867 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
868 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
870 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
871 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
872 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
874 # Put with exactly the data ID keys needed
875 ref = butler.put(metric, "metric1", dataId1)
876 self.assertTrue(self.checkFileExists(butler.datastore.root,
877 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
879 # Check the template based on dimensions
880 butler.datastore.templates.validateTemplates([ref])
882 # Put with extra data ID keys (physical_filter is an optional
883 # dependency); should not change template (at least the way we're
884 # defining them to behave now; the important thing is that they
885 # must be consistent).
886 ref = butler.put(metric, "metric2", dataId2)
887 self.assertTrue(self.checkFileExists(butler.datastore.root,
888 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
890 # Check the template based on dimensions
891 butler.datastore.templates.validateTemplates([ref])
893 # Now use a file template that will not result in unique filenames
894 ref = butler.put(metric, "metric3", dataId1)
896 # Check the template based on dimensions. This one is a bad template
897 with self.assertRaises(FileTemplateValidationError):
898 butler.datastore.templates.validateTemplates([ref])
900 with self.assertRaises(FileExistsError):
901 butler.put(metric, "metric3", dataId3)
903 def testImportExport(self):
904 # Run put/get tests just to create and populate a repo.
905 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
906 self.runImportExportTest(storageClass)
908 @unittest.expectedFailure
909 def testImportExportVirtualComposite(self):
910 # Run put/get tests just to create and populate a repo.
911 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
912 self.runImportExportTest(storageClass)
914 def runImportExportTest(self, storageClass):
915 exportButler = self.runPutGetTest(storageClass, "test_metric")
916 # Test that the repo actually has at least one dataset.
917 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
918 self.assertGreater(len(datasets), 0)
919 # Export those datasets. We used TemporaryDirectory because there
920 # doesn't seem to be a way to get the filename (as opposed to the file
921 # object) from any of tempfile's temporary-file context managers.
922 with tempfile.TemporaryDirectory() as exportDir:
923 # TODO: When PosixDatastore supports transfer-on-exist, add tests
924 # for that.
925 exportFile = os.path.join(exportDir, "exports.yaml")
926 with exportButler.export(filename=exportFile) as export:
927 export.saveDatasets(datasets)
928 self.assertTrue(os.path.exists(exportFile))
929 with tempfile.TemporaryDirectory() as importDir:
930 Butler.makeRepo(importDir, config=Config(self.configFile))
931 # Calling script.butlerImport tests the implementation of the
932 # butler command line interface "import" subcommand. Functions
933 # in the script folder are generally considered protected and
934 # should not be used as public api.
935 with open(exportFile, "r") as f:
936 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
937 directory=exportButler.datastore.root, transfer="symlink")
938 importButler = Butler(importDir, run="ingest/run")
939 for ref in datasets:
940 with self.subTest(ref=ref):
941 # Test for existence by passing in the DatasetType and
942 # data ID separately, to avoid lookup by dataset_id.
943 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
946class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
947 """PosixDatastore specialization of a butler"""
948 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
949 fullConfigKey = ".datastore.formatters"
950 validationCanFail = True
951 datastoreStr = ["/tmp"]
952 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
953 registryStr = "/gen3.sqlite3"
956class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
957 """InMemoryDatastore specialization of a butler"""
958 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
959 fullConfigKey = None
960 useTempRoot = False
961 validationCanFail = False
962 datastoreStr = ["datastore='InMemory"]
963 datastoreName = ["InMemoryDatastore@"]
964 registryStr = ":memory:"
966 def testIngest(self):
967 pass
970class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
971 """PosixDatastore specialization"""
972 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
973 fullConfigKey = ".datastore.datastores.1.formatters"
974 validationCanFail = True
975 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
976 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
977 "SecondDatastore"]
978 registryStr = "/gen3.sqlite3"
981class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
982 """Test that a yaml file in one location can refer to a root in another."""
984 datastoreStr = ["dir1"]
985 # Disable the makeRepo test since we are deliberately not using
986 # butler.yaml as the config name.
987 fullConfigKey = None
989 def setUp(self):
990 self.root = tempfile.mkdtemp(dir=TESTDIR)
992 # Make a new repository in one place
993 self.dir1 = os.path.join(self.root, "dir1")
994 Butler.makeRepo(self.dir1, config=Config(self.configFile))
996 # Move the yaml file to a different place and add a "root"
997 self.dir2 = os.path.join(self.root, "dir2")
998 safeMakeDir(self.dir2)
999 configFile1 = os.path.join(self.dir1, "butler.yaml")
1000 config = Config(configFile1)
1001 config["root"] = self.dir1
1002 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1003 config.dumpToFile(configFile2)
1004 os.remove(configFile1)
1005 self.tmpConfigFile = configFile2
1007 def testFileLocations(self):
1008 self.assertNotEqual(self.dir1, self.dir2)
1009 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1010 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1011 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1014class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1015 """Test that a config file created by makeRepo outside of repo works."""
1017 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1019 def setUp(self):
1020 self.root = tempfile.mkdtemp(dir=TESTDIR)
1021 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1023 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1024 Butler.makeRepo(self.root, config=Config(self.configFile),
1025 outfile=self.tmpConfigFile)
1027 def tearDown(self):
1028 if os.path.exists(self.root2):
1029 shutil.rmtree(self.root2, ignore_errors=True)
1030 super().tearDown()
1032 def testConfigExistence(self):
1033 c = Config(self.tmpConfigFile)
1034 uri_config = ButlerURI(c["root"])
1035 uri_expected = ButlerURI(self.root, forceDirectory=True)
1036 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1037 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1039 def testPutGet(self):
1040 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1041 self.runPutGetTest(storageClass, "test_metric")
1044class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1045 """Test that a config file created by makeRepo outside of repo works."""
1047 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1049 def setUp(self):
1050 self.root = tempfile.mkdtemp(dir=TESTDIR)
1051 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1053 self.tmpConfigFile = self.root2
1054 Butler.makeRepo(self.root, config=Config(self.configFile),
1055 outfile=self.tmpConfigFile)
1057 def testConfigExistence(self):
1058 # Append the yaml file else Config constructor does not know the file
1059 # type.
1060 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1061 super().testConfigExistence()
1064class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1065 """Test that a config file created by makeRepo outside of repo works."""
1067 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1069 def setUp(self):
1070 self.root = tempfile.mkdtemp(dir=TESTDIR)
1071 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1073 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1074 Butler.makeRepo(self.root, config=Config(self.configFile),
1075 outfile=self.tmpConfigFile)
1078@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1079@mock_s3
1080class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1081 """S3Datastore specialization of a butler; an S3 storage Datastore +
1082 a local in-memory SqlRegistry.
1083 """
1084 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1085 fullConfigKey = None
1086 validationCanFail = True
1088 bucketName = "anybucketname"
1089 """Name of the Bucket that will be used in the tests. The name is read from
1090 the config file used with the tests during set-up.
1091 """
1093 root = "butlerRoot/"
1094 """Root repository directory expected to be used in case useTempRoot=False.
1095 Otherwise the root is set to a 20 characters long randomly generated string
1096 during set-up.
1097 """
1099 datastoreStr = [f"datastore={root}"]
1100 """Contains all expected root locations in a format expected to be
1101 returned by Butler stringification.
1102 """
1104 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1105 """The expected format of the S3Datastore string."""
1107 registryStr = ":memory:"
1108 """Expected format of the Registry string."""
1110 def genRoot(self):
1111 """Returns a random string of len 20 to serve as a root
1112 name for the temporary bucket repo.
1114 This is equivalent to tempfile.mkdtemp as this is what self.root
1115 becomes when useTempRoot is True.
1116 """
1117 rndstr = "".join(
1118 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1119 )
1120 return rndstr + "/"
1122 def setUp(self):
1123 config = Config(self.configFile)
1124 uri = ButlerURI(config[".datastore.datastore.root"])
1125 self.bucketName = uri.netloc
1127 # set up some fake credentials if they do not exist
1128 self.usingDummyCredentials = setAwsEnvCredentials()
1130 if self.useTempRoot:
1131 self.root = self.genRoot()
1132 rooturi = f"s3://{self.bucketName}/{self.root}"
1133 config.update({"datastore": {"datastore": {"root": rooturi}}})
1135 # MOTO needs to know that we expect Bucket bucketname to exist
1136 # (this used to be the class attribute bucketName)
1137 s3 = boto3.resource("s3")
1138 s3.create_bucket(Bucket=self.bucketName)
1140 self.datastoreStr = f"datastore={self.root}"
1141 self.datastoreName = [f"S3Datastore@{rooturi}"]
1142 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1143 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1145 def tearDown(self):
1146 s3 = boto3.resource("s3")
1147 bucket = s3.Bucket(self.bucketName)
1148 try:
1149 bucket.objects.all().delete()
1150 except botocore.exceptions.ClientError as e:
1151 if e.response["Error"]["Code"] == "404":
1152 # the key was not reachable - pass
1153 pass
1154 else:
1155 raise
1157 bucket = s3.Bucket(self.bucketName)
1158 bucket.delete()
1160 # unset any potentially set dummy credentials
1161 if self.usingDummyCredentials:
1162 unsetAwsEnvCredentials()
1164 def checkFileExists(self, root, relpath):
1165 """Checks if file exists at a given path (relative to root).
1167 Test testPutTemplates verifies actual physical existance of the files
1168 in the requested location. For S3Datastore this test is equivalent to
1169 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1170 """
1171 uri = ButlerURI(root)
1172 uri.updateFile(relpath)
1173 return s3CheckFileExists(uri)[0]
1175 @unittest.expectedFailure
1176 def testImportExport(self):
1177 super().testImportExport()
1180if __name__ == "__main__": 1180 ↛ 1181line 1180 didn't jump to line 1181, because the condition on line 1180 was never true
1181 unittest.main()