Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
34try:
35 import boto3
36 import botocore
37 from moto import mock_s3
38except ImportError:
39 boto3 = None
41 def mock_s3(cls):
42 """A no-op decorator in case moto mock_s3 can not be imported.
43 """
44 return cls
46import astropy.time
47from lsst.utils import doImport
48from lsst.daf.butler.core.utils import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler import ButlerURI
56from lsst.daf.butler import script
57from lsst.daf.butler.registry import MissingCollectionError
58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
60 unsetAwsEnvCredentials)
62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
64TESTDIR = os.path.abspath(os.path.dirname(__file__))
67def makeExampleMetrics():
68 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
69 {"a": [1, 2, 3],
70 "b": {"blue": 5, "red": "green"}},
71 [563, 234, 456.7, 752, 8, 9, 27]
72 )
75class TransactionTestError(Exception):
76 """Specific error for testing transactions, to prevent misdiagnosing
77 that might otherwise occur when a standard exception is used.
78 """
79 pass
82class ButlerConfigTests(unittest.TestCase):
83 """Simple tests for ButlerConfig that are not tested in other test cases.
84 """
86 def testSearchPath(self):
87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
89 config1 = ButlerConfig(configFile)
90 self.assertNotIn("testConfigs", "\n".join(cm.output))
92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
95 self.assertIn("testConfigs", "\n".join(cm.output))
97 key = ("datastore", "records", "table")
98 self.assertNotEqual(config1[key], config2[key])
99 self.assertEqual(config2[key], "override_record")
102class ButlerPutGetTests:
103 """Helper method for running a suite of put/get tests from different
104 butler configurations."""
106 root = None
108 @staticmethod
109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
110 """Create a DatasetType and register it
111 """
112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
113 registry.registerDatasetType(datasetType)
114 return datasetType
116 @classmethod
117 def setUpClass(cls):
118 cls.storageClassFactory = StorageClassFactory()
119 cls.storageClassFactory.addFromConfig(cls.configFile)
121 def assertGetComponents(self, butler, datasetRef, components, reference):
122 datasetType = datasetRef.datasetType
123 dataId = datasetRef.dataId
124 for component in components:
125 compTypeName = datasetType.componentTypeName(component)
126 result = butler.get(compTypeName, dataId)
127 self.assertEqual(result, getattr(reference, component))
129 def tearDown(self):
130 if self.root is not None and os.path.exists(self.root):
131 shutil.rmtree(self.root, ignore_errors=True)
133 def runPutGetTest(self, storageClass, datasetTypeName):
134 # New datasets will be added to run and tag, but we will only look in
135 # tag when looking up datasets.
136 run = "ingest/run"
137 tag = "ingest"
138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
140 # There will not be a collection yet
141 collections = set(butler.registry.queryCollections())
142 self.assertEqual(collections, set([run, tag]))
144 # Create and register a DatasetType
145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
149 # Try to create one that will have a name that is too long
150 with self.assertRaises(Exception) as cm:
151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
152 self.assertIn("check constraint", str(cm.exception).lower())
154 # Add needed Dimensions
155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
157 "name": "d-r",
158 "abstract_filter": "R"})
159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
160 "id": 1,
161 "name": "default"})
162 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
163 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
164 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
165 "name": "fourtwentythree", "physical_filter": "d-r",
166 "visit_system": 1, "datetime_begin": visit_start,
167 "datetime_end": visit_end})
169 # Add a second visit for some later tests
170 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
171 "name": "fourtwentyfour", "physical_filter": "d-r",
172 "visit_system": 1})
174 # Create and store a dataset
175 metric = makeExampleMetrics()
176 dataId = {"instrument": "DummyCamComp", "visit": 423}
178 # Create a DatasetRef for put
179 refIn = DatasetRef(datasetType, dataId, id=None)
181 # Put with a preexisting id should fail
182 with self.assertRaises(ValueError):
183 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
185 # Put and remove the dataset once as a DatasetRef, once as a dataId,
186 # and once with a DatasetType
187 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
188 with self.subTest(args=args):
189 ref = butler.put(metric, *args)
190 self.assertIsInstance(ref, DatasetRef)
192 # Test getDirect
193 metricOut = butler.getDirect(ref)
194 self.assertEqual(metric, metricOut)
195 # Test get
196 metricOut = butler.get(ref.datasetType.name, dataId)
197 self.assertEqual(metric, metricOut)
198 # Test get with a datasetRef
199 metricOut = butler.get(ref)
200 self.assertEqual(metric, metricOut)
201 # Test getDeferred with dataId
202 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
203 self.assertEqual(metric, metricOut)
204 # Test getDeferred with a datasetRef
205 metricOut = butler.getDeferred(ref).get()
206 self.assertEqual(metric, metricOut)
208 # Check we can get components
209 if storageClass.isComposite():
210 self.assertGetComponents(butler, ref,
211 ("summary", "data", "output"), metric)
213 # Remove from the tagged collection only; after that we
214 # shouldn't be able to find it unless we use the dataset_id.
215 butler.pruneDatasets([ref])
216 with self.assertRaises(LookupError):
217 butler.datasetExists(*args)
218 # Registry still knows about it, if we use the dataset_id.
219 self.assertEqual(butler.registry.getDataset(ref.id), ref)
220 # If we use the output ref with the dataset_id, we should
221 # still be able to load it with getDirect().
222 self.assertEqual(metric, butler.getDirect(ref))
224 # Reinsert into collection, then delete from Datastore *and*
225 # remove from collection.
226 butler.registry.associate(tag, [ref])
227 butler.pruneDatasets([ref], unstore=True)
228 # Lookup with original args should still fail.
229 with self.assertRaises(LookupError):
230 butler.datasetExists(*args)
231 # Now getDirect() should fail, too.
232 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
233 butler.getDirect(ref)
234 # Registry still knows about it, if we use the dataset_id.
235 self.assertEqual(butler.registry.getDataset(ref.id), ref)
237 # Now remove the dataset completely.
238 butler.pruneDatasets([ref], purge=True, unstore=True)
239 # Lookup with original args should still fail.
240 with self.assertRaises(LookupError):
241 butler.datasetExists(*args)
242 # getDirect() should still fail.
243 with self.assertRaises(FileNotFoundError):
244 butler.getDirect(ref)
245 # Registry shouldn't be able to find it by dataset_id anymore.
246 self.assertIsNone(butler.registry.getDataset(ref.id))
248 # Put the dataset again, since the last thing we did was remove it.
249 ref = butler.put(metric, refIn)
251 # Get with parameters
252 stop = 4
253 sliced = butler.get(ref, parameters={"slice": slice(stop)})
254 self.assertNotEqual(metric, sliced)
255 self.assertEqual(metric.summary, sliced.summary)
256 self.assertEqual(metric.output, sliced.output)
257 self.assertEqual(metric.data[:stop], sliced.data)
258 # getDeferred with parameters
259 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
260 self.assertNotEqual(metric, sliced)
261 self.assertEqual(metric.summary, sliced.summary)
262 self.assertEqual(metric.output, sliced.output)
263 self.assertEqual(metric.data[:stop], sliced.data)
264 # getDeferred with deferred parameters
265 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
266 self.assertNotEqual(metric, sliced)
267 self.assertEqual(metric.summary, sliced.summary)
268 self.assertEqual(metric.output, sliced.output)
269 self.assertEqual(metric.data[:stop], sliced.data)
271 if storageClass.isComposite():
272 # Check that components can be retrieved
273 metricOut = butler.get(ref.datasetType.name, dataId)
274 compNameS = ref.datasetType.componentTypeName("summary")
275 compNameD = ref.datasetType.componentTypeName("data")
276 summary = butler.get(compNameS, dataId)
277 self.assertEqual(summary, metric.summary)
278 data = butler.get(compNameD, dataId)
279 self.assertEqual(data, metric.data)
281 if "counter" in storageClass.readComponents:
282 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
283 self.assertEqual(count, len(data))
285 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
286 parameters={"slice": slice(stop)})
287 self.assertEqual(count, stop)
289 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
290 summary = butler.getDirect(compRef)
291 self.assertEqual(summary, metric.summary)
293 # Create a Dataset type that has the same name but is inconsistent.
294 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
295 self.storageClassFactory.getStorageClass("Config"))
297 # Getting with a dataset type that does not match registry fails
298 with self.assertRaises(ValueError):
299 butler.get(inconsistentDatasetType, dataId)
301 # Combining a DatasetRef with a dataId should fail
302 with self.assertRaises(ValueError):
303 butler.get(ref, dataId)
304 # Getting with an explicit ref should fail if the id doesn't match
305 with self.assertRaises(ValueError):
306 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
308 # Getting a dataset with unknown parameters should fail
309 with self.assertRaises(KeyError):
310 butler.get(ref, parameters={"unsupported": True})
312 # Check we have a collection
313 collections = set(butler.registry.queryCollections())
314 self.assertEqual(collections, {run, tag})
316 # Clean up to check that we can remove something that may have
317 # already had a component removed
318 butler.pruneDatasets([ref], unstore=True, purge=True)
320 # Add a dataset back in since some downstream tests require
321 # something to be present
322 ref = butler.put(metric, refIn)
324 return butler
326 def testDeferredCollectionPassing(self):
327 # Construct a butler with no run or collection, but make it writeable.
328 butler = Butler(self.tmpConfigFile, writeable=True)
329 # Create and register a DatasetType
330 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
331 datasetType = self.addDatasetType("example", dimensions,
332 self.storageClassFactory.getStorageClass("StructuredData"),
333 butler.registry)
334 # Add needed Dimensions
335 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
336 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
337 "name": "d-r",
338 "abstract_filter": "R"})
339 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
340 "name": "fourtwentythree", "physical_filter": "d-r"})
341 dataId = {"instrument": "DummyCamComp", "visit": 423}
342 # Create dataset.
343 metric = makeExampleMetrics()
344 # Register a new run and put dataset.
345 run = "deferred"
346 butler.registry.registerRun(run)
347 ref = butler.put(metric, datasetType, dataId, run=run)
348 # Putting with no run should fail with TypeError.
349 with self.assertRaises(TypeError):
350 butler.put(metric, datasetType, dataId)
351 # Dataset should exist.
352 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
353 # We should be able to get the dataset back, but with and without
354 # a deferred dataset handle.
355 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
356 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
357 # Trying to find the dataset without any collection is a TypeError.
358 with self.assertRaises(TypeError):
359 butler.datasetExists(datasetType, dataId)
360 with self.assertRaises(TypeError):
361 butler.get(datasetType, dataId)
362 # Associate the dataset with a different collection.
363 butler.registry.registerCollection("tagged")
364 butler.registry.associate("tagged", [ref])
365 # Deleting the dataset from the new collection should make it findable
366 # in the original collection.
367 butler.pruneDatasets([ref], tags=["tagged"])
368 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
371class ButlerTests(ButlerPutGetTests):
372 """Tests for Butler.
373 """
374 useTempRoot = True
376 def setUp(self):
377 """Create a new butler root for each test."""
378 if self.useTempRoot:
379 self.root = tempfile.mkdtemp(dir=TESTDIR)
380 Butler.makeRepo(self.root, config=Config(self.configFile))
381 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
382 else:
383 self.root = None
384 self.tmpConfigFile = self.configFile
386 def testConstructor(self):
387 """Independent test of constructor.
388 """
389 butler = Butler(self.tmpConfigFile, run="ingest")
390 self.assertIsInstance(butler, Butler)
392 collections = set(butler.registry.queryCollections())
393 self.assertEqual(collections, {"ingest"})
395 butler2 = Butler(butler=butler, collections=["other"])
396 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
397 self.assertIsNone(butler2.run)
398 self.assertIs(butler.registry, butler2.registry)
399 self.assertIs(butler.datastore, butler2.datastore)
401 def testBasicPutGet(self):
402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
403 self.runPutGetTest(storageClass, "test_metric")
405 def testCompositePutGetConcrete(self):
407 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
408 butler = self.runPutGetTest(storageClass, "test_metric")
410 # Should *not* be disassembled
411 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
412 self.assertEqual(len(datasets), 1)
413 uri, components = butler.getURIs(datasets[0])
414 self.assertIsInstance(uri, ButlerURI)
415 self.assertFalse(components)
416 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
417 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
419 # Predicted dataset
420 dataId = {"instrument": "DummyCamComp", "visit": 424}
421 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
422 self.assertFalse(components)
423 self.assertIsInstance(uri, ButlerURI)
424 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
425 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
427 def testCompositePutGetVirtual(self):
428 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
429 butler = self.runPutGetTest(storageClass, "test_metric_comp")
431 # Should be disassembled
432 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
433 self.assertEqual(len(datasets), 1)
434 uri, components = butler.getURIs(datasets[0])
436 if butler.datastore.isEphemeral:
437 # Never disassemble in-memory datastore
438 self.assertIsInstance(uri, ButlerURI)
439 self.assertFalse(components)
440 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
441 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
442 else:
443 self.assertIsNone(uri)
444 self.assertEqual(set(components), set(storageClass.components))
445 for compuri in components.values():
446 self.assertIsInstance(compuri, ButlerURI)
447 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
448 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
450 # Predicted dataset
451 dataId = {"instrument": "DummyCamComp", "visit": 424}
452 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
454 if butler.datastore.isEphemeral:
455 # Never disassembled
456 self.assertIsInstance(uri, ButlerURI)
457 self.assertFalse(components)
458 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
459 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
460 else:
461 self.assertIsNone(uri)
462 self.assertEqual(set(components), set(storageClass.components))
463 for compuri in components.values():
464 self.assertIsInstance(compuri, ButlerURI)
465 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
466 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
468 def testIngest(self):
469 butler = Butler(self.tmpConfigFile, run="ingest")
471 # Create and register a DatasetType
472 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
475 datasetTypeName = "metric"
477 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
479 # Add needed Dimensions
480 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
481 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
482 "name": "d-r",
483 "abstract_filter": "R"})
484 for detector in (1, 2):
485 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
486 "full_name": f"detector{detector}"})
488 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
489 "name": "fourtwentythree", "physical_filter": "d-r"},
490 {"instrument": "DummyCamComp", "id": 424,
491 "name": "fourtwentyfour", "physical_filter": "d-r"})
493 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
494 dataRoot = os.path.join(TESTDIR, "data", "basic")
495 datasets = []
496 for detector in (1, 2):
497 detector_name = f"detector_{detector}"
498 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
499 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
500 # Create a DatasetRef for ingest
501 refIn = DatasetRef(datasetType, dataId, id=None)
503 datasets.append(FileDataset(path=metricFile,
504 refs=[refIn],
505 formatter=formatter))
507 butler.ingest(*datasets, transfer="copy")
509 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
510 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
512 metrics1 = butler.get(datasetTypeName, dataId1)
513 metrics2 = butler.get(datasetTypeName, dataId2)
514 self.assertNotEqual(metrics1, metrics2)
516 # Compare URIs
517 uri1 = butler.getURI(datasetTypeName, dataId1)
518 uri2 = butler.getURI(datasetTypeName, dataId2)
519 self.assertNotEqual(uri1, uri2)
521 # Now do a multi-dataset but single file ingest
522 metricFile = os.path.join(dataRoot, "detectors.yaml")
523 refs = []
524 for detector in (1, 2):
525 detector_name = f"detector_{detector}"
526 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
527 # Create a DatasetRef for ingest
528 refs.append(DatasetRef(datasetType, dataId, id=None))
530 datasets = []
531 datasets.append(FileDataset(path=metricFile,
532 refs=refs,
533 formatter=MultiDetectorFormatter))
535 butler.ingest(*datasets, transfer="copy")
537 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
538 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
540 multi1 = butler.get(datasetTypeName, dataId1)
541 multi2 = butler.get(datasetTypeName, dataId2)
543 self.assertEqual(multi1, metrics1)
544 self.assertEqual(multi2, metrics2)
546 # Compare URIs
547 uri1 = butler.getURI(datasetTypeName, dataId1)
548 uri2 = butler.getURI(datasetTypeName, dataId2)
549 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
551 # Test that removing one does not break the second
552 # This line will issue a warning log message for a ChainedDatastore
553 # that uses an InMemoryDatastore since in-memory can not ingest
554 # files.
555 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
556 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
557 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
558 multi2b = butler.get(datasetTypeName, dataId2)
559 self.assertEqual(multi2, multi2b)
561 def testPruneCollections(self):
562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
563 butler = Butler(self.tmpConfigFile, writeable=True)
564 # Load registry data with dimensions to hang datasets off of.
565 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
566 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
567 # Add some RUN-type collections.
568 run1 = "run1"
569 butler.registry.registerRun(run1)
570 run2 = "run2"
571 butler.registry.registerRun(run2)
572 # put some datasets. ref1 and ref2 have the same data ID, and are in
573 # different runs. ref3 has a different data ID.
574 metric = makeExampleMetrics()
575 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
576 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
577 butler.registry)
578 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
579 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
580 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
581 # Try to delete a RUN collection without purge, or with purge and not
582 # unstore.
583 with self.assertRaises(TypeError):
584 butler.pruneCollection(run1)
585 with self.assertRaises(TypeError):
586 butler.pruneCollection(run2, purge=True)
587 # Add a TAGGED collection and associate ref3 only into it.
588 tag1 = "tag1"
589 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
590 butler.registry.associate(tag1, [ref3])
591 # Add a CHAINED collection that searches run1 and then run2. It
592 # logically contains only ref1, because ref2 is shadowed due to them
593 # having the same data ID and dataset type.
594 chain1 = "chain1"
595 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
596 butler.registry.setCollectionChain(chain1, [run1, run2])
597 # Try to delete RUN collections, which should fail with complete
598 # rollback because they're still referenced by the CHAINED
599 # collection.
600 with self.assertRaises(Exception):
601 butler.pruneCollection(run1, pruge=True, unstore=True)
602 with self.assertRaises(Exception):
603 butler.pruneCollection(run2, pruge=True, unstore=True)
604 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
605 [ref1, ref2, ref3])
606 self.assertTrue(butler.datastore.exists(ref1))
607 self.assertTrue(butler.datastore.exists(ref2))
608 self.assertTrue(butler.datastore.exists(ref3))
609 # Try to delete CHAINED and TAGGED collections with purge; should not
610 # work.
611 with self.assertRaises(TypeError):
612 butler.pruneCollection(tag1, purge=True, unstore=True)
613 with self.assertRaises(TypeError):
614 butler.pruneCollection(chain1, purge=True, unstore=True)
615 # Remove the tagged collection with unstore=False. This should not
616 # affect the datasets.
617 butler.pruneCollection(tag1)
618 with self.assertRaises(MissingCollectionError):
619 butler.registry.getCollectionType(tag1)
620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
621 [ref1, ref2, ref3])
622 self.assertTrue(butler.datastore.exists(ref1))
623 self.assertTrue(butler.datastore.exists(ref2))
624 self.assertTrue(butler.datastore.exists(ref3))
625 # Add the tagged collection back in, and remove it with unstore=True.
626 # This should remove ref3 only from the datastore.
627 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
628 butler.registry.associate(tag1, [ref3])
629 butler.pruneCollection(tag1, unstore=True)
630 with self.assertRaises(MissingCollectionError):
631 butler.registry.getCollectionType(tag1)
632 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
633 [ref1, ref2, ref3])
634 self.assertTrue(butler.datastore.exists(ref1))
635 self.assertTrue(butler.datastore.exists(ref2))
636 self.assertFalse(butler.datastore.exists(ref3))
637 # Delete the chain with unstore=False. The datasets should not be
638 # affected at all.
639 butler.pruneCollection(chain1)
640 with self.assertRaises(MissingCollectionError):
641 butler.registry.getCollectionType(chain1)
642 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
643 [ref1, ref2, ref3])
644 self.assertTrue(butler.datastore.exists(ref1))
645 self.assertTrue(butler.datastore.exists(ref2))
646 self.assertFalse(butler.datastore.exists(ref3))
647 # Redefine and then delete the chain with unstore=True. Only ref1
648 # should be unstored (ref3 has already been unstored, but otherwise
649 # would be now).
650 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
651 butler.registry.setCollectionChain(chain1, [run1, run2])
652 butler.pruneCollection(chain1, unstore=True)
653 with self.assertRaises(MissingCollectionError):
654 butler.registry.getCollectionType(chain1)
655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
656 [ref1, ref2, ref3])
657 self.assertFalse(butler.datastore.exists(ref1))
658 self.assertTrue(butler.datastore.exists(ref2))
659 self.assertFalse(butler.datastore.exists(ref3))
660 # Remove run1. This removes ref1 and ref3 from the registry (they're
661 # already gone from the datastore, which is fine).
662 butler.pruneCollection(run1, purge=True, unstore=True)
663 with self.assertRaises(MissingCollectionError):
664 butler.registry.getCollectionType(run1)
665 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
666 [ref2])
667 self.assertTrue(butler.datastore.exists(ref2))
668 # Remove run2. This removes ref2 from the registry and the datastore.
669 butler.pruneCollection(run2, purge=True, unstore=True)
670 with self.assertRaises(MissingCollectionError):
671 butler.registry.getCollectionType(run2)
672 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
673 [])
675 def testPickle(self):
676 """Test pickle support.
677 """
678 butler = Butler(self.tmpConfigFile, run="ingest")
679 butlerOut = pickle.loads(pickle.dumps(butler))
680 self.assertIsInstance(butlerOut, Butler)
681 self.assertEqual(butlerOut._config, butler._config)
682 self.assertEqual(butlerOut.collections, butler.collections)
683 self.assertEqual(butlerOut.run, butler.run)
685 def testGetDatasetTypes(self):
686 butler = Butler(self.tmpConfigFile, run="ingest")
687 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
688 dimensionEntries = [
689 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
690 {"instrument": "DummyCamComp"}),
691 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
692 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
693 ]
694 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
695 # Add needed Dimensions
696 for args in dimensionEntries:
697 butler.registry.insertDimensionData(*args)
699 # When a DatasetType is added to the registry entries are not created
700 # for components but querying them can return the components.
701 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
702 components = set()
703 for datasetTypeName in datasetTypeNames:
704 # Create and register a DatasetType
705 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
707 for componentName in storageClass.components:
708 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
710 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
711 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
713 # Now that we have some dataset types registered, validate them
714 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
715 "datasetType.component"])
717 # Add a new datasetType that will fail template validation
718 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
719 if self.validationCanFail:
720 with self.assertRaises(ValidationError):
721 butler.validateConfiguration()
723 # Rerun validation but with a subset of dataset type names
724 butler.validateConfiguration(datasetTypeNames=["metric4"])
726 # Rerun validation but ignore the bad datasetType
727 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
728 "datasetType.component"])
730 def testTransaction(self):
731 butler = Butler(self.tmpConfigFile, run="ingest")
732 datasetTypeName = "test_metric"
733 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
734 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
735 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
736 "abstract_filter": "R"}),
737 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
738 "physical_filter": "d-r"}))
739 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
740 metric = makeExampleMetrics()
741 dataId = {"instrument": "DummyCam", "visit": 42}
742 # Create and register a DatasetType
743 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
744 with self.assertRaises(TransactionTestError):
745 with butler.transaction():
746 # Add needed Dimensions
747 for args in dimensionEntries:
748 butler.registry.insertDimensionData(*args)
749 # Store a dataset
750 ref = butler.put(metric, datasetTypeName, dataId)
751 self.assertIsInstance(ref, DatasetRef)
752 # Test getDirect
753 metricOut = butler.getDirect(ref)
754 self.assertEqual(metric, metricOut)
755 # Test get
756 metricOut = butler.get(datasetTypeName, dataId)
757 self.assertEqual(metric, metricOut)
758 # Check we can get components
759 self.assertGetComponents(butler, ref,
760 ("summary", "data", "output"), metric)
761 raise TransactionTestError("This should roll back the entire transaction")
762 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
763 butler.registry.expandDataId(dataId)
764 # Should raise LookupError for missing data ID value
765 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
766 butler.get(datasetTypeName, dataId)
767 # Also check explicitly if Dataset entry is missing
768 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
769 # Direct retrieval should not find the file in the Datastore
770 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
771 butler.getDirect(ref)
773 def testMakeRepo(self):
774 """Test that we can write butler configuration to a new repository via
775 the Butler.makeRepo interface and then instantiate a butler from the
776 repo root.
777 """
778 # Do not run the test if we know this datastore configuration does
779 # not support a file system root
780 if self.fullConfigKey is None:
781 return
783 # Remove the file created in setUp
784 os.unlink(self.tmpConfigFile)
786 createRegistry = not self.useTempRoot
787 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
788 createRegistry=createRegistry)
789 limited = Config(self.configFile)
790 butler1 = Butler(butlerConfig)
791 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
792 config=Config(self.configFile), overwrite=True)
793 full = Config(self.tmpConfigFile)
794 butler2 = Butler(butlerConfig)
795 # Butlers should have the same configuration regardless of whether
796 # defaults were expanded.
797 self.assertEqual(butler1._config, butler2._config)
798 # Config files loaded directly should not be the same.
799 self.assertNotEqual(limited, full)
800 # Make sure "limited" doesn't have a few keys we know it should be
801 # inheriting from defaults.
802 self.assertIn(self.fullConfigKey, full)
803 self.assertNotIn(self.fullConfigKey, limited)
805 # Collections don't appear until something is put in them
806 collections1 = set(butler1.registry.queryCollections())
807 self.assertEqual(collections1, set())
808 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
810 # Check that a config with no associated file name will not
811 # work properly with relocatable Butler repo
812 butlerConfig.configFile = None
813 with self.assertRaises(ValueError):
814 Butler(butlerConfig)
816 with self.assertRaises(FileExistsError):
817 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
818 config=Config(self.configFile), overwrite=False)
820 def testStringification(self):
821 butler = Butler(self.tmpConfigFile, run="ingest")
822 butlerStr = str(butler)
824 if self.datastoreStr is not None:
825 for testStr in self.datastoreStr:
826 self.assertIn(testStr, butlerStr)
827 if self.registryStr is not None:
828 self.assertIn(self.registryStr, butlerStr)
830 datastoreName = butler.datastore.name
831 if self.datastoreName is not None:
832 for testStr in self.datastoreName:
833 self.assertIn(testStr, datastoreName)
836class FileLikeDatastoreButlerTests(ButlerTests):
837 """Common tests and specialization of ButlerTests for butlers backed
838 by datastores that inherit from FileLikeDatastore.
839 """
841 def checkFileExists(self, root, path):
842 """Checks if file exists at a given path (relative to root).
844 Test testPutTemplates verifies actual physical existance of the files
845 in the requested location. For POSIXDatastore this test is equivalent
846 to `os.path.exists` call.
847 """
848 return os.path.exists(os.path.join(root, path))
850 def testPutTemplates(self):
851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
852 butler = Butler(self.tmpConfigFile, run="ingest")
854 # Add needed Dimensions
855 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
856 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
857 "name": "d-r",
858 "abstract_filter": "R"})
859 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
860 "physical_filter": "d-r"})
861 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
862 "physical_filter": "d-r"})
864 # Create and store a dataset
865 metric = makeExampleMetrics()
867 # Create two almost-identical DatasetTypes (both will use default
868 # template)
869 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
870 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
871 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
872 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
874 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
875 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
876 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
878 # Put with exactly the data ID keys needed
879 ref = butler.put(metric, "metric1", dataId1)
880 self.assertTrue(self.checkFileExists(butler.datastore.root,
881 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
883 # Check the template based on dimensions
884 butler.datastore.templates.validateTemplates([ref])
886 # Put with extra data ID keys (physical_filter is an optional
887 # dependency); should not change template (at least the way we're
888 # defining them to behave now; the important thing is that they
889 # must be consistent).
890 ref = butler.put(metric, "metric2", dataId2)
891 self.assertTrue(self.checkFileExists(butler.datastore.root,
892 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
894 # Check the template based on dimensions
895 butler.datastore.templates.validateTemplates([ref])
897 # Now use a file template that will not result in unique filenames
898 ref = butler.put(metric, "metric3", dataId1)
900 # Check the template based on dimensions. This one is a bad template
901 with self.assertRaises(FileTemplateValidationError):
902 butler.datastore.templates.validateTemplates([ref])
904 with self.assertRaises(FileExistsError):
905 butler.put(metric, "metric3", dataId3)
907 def testImportExport(self):
908 # Run put/get tests just to create and populate a repo.
909 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
910 self.runImportExportTest(storageClass)
912 @unittest.expectedFailure
913 def testImportExportVirtualComposite(self):
914 # Run put/get tests just to create and populate a repo.
915 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
916 self.runImportExportTest(storageClass)
918 def runImportExportTest(self, storageClass):
919 exportButler = self.runPutGetTest(storageClass, "test_metric")
920 # Test that the repo actually has at least one dataset.
921 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
922 self.assertGreater(len(datasets), 0)
923 # Export those datasets. We used TemporaryDirectory because there
924 # doesn't seem to be a way to get the filename (as opposed to the file
925 # object) from any of tempfile's temporary-file context managers.
926 with tempfile.TemporaryDirectory() as exportDir:
927 # TODO: When PosixDatastore supports transfer-on-exist, add tests
928 # for that.
929 exportFile = os.path.join(exportDir, "exports.yaml")
930 with exportButler.export(filename=exportFile) as export:
931 export.saveDatasets(datasets)
932 self.assertTrue(os.path.exists(exportFile))
933 with tempfile.TemporaryDirectory() as importDir:
934 Butler.makeRepo(importDir, config=Config(self.configFile))
935 # Calling script.butlerImport tests the implementation of the
936 # butler command line interface "import" subcommand. Functions
937 # in the script folder are generally considered protected and
938 # should not be used as public api.
939 with open(exportFile, "r") as f:
940 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
941 directory=exportButler.datastore.root, transfer="symlink")
942 importButler = Butler(importDir, run="ingest/run")
943 for ref in datasets:
944 with self.subTest(ref=ref):
945 # Test for existence by passing in the DatasetType and
946 # data ID separately, to avoid lookup by dataset_id.
947 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
950class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
951 """PosixDatastore specialization of a butler"""
952 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
953 fullConfigKey = ".datastore.formatters"
954 validationCanFail = True
955 datastoreStr = ["/tmp"]
956 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
957 registryStr = "/gen3.sqlite3"
960class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
961 """InMemoryDatastore specialization of a butler"""
962 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
963 fullConfigKey = None
964 useTempRoot = False
965 validationCanFail = False
966 datastoreStr = ["datastore='InMemory"]
967 datastoreName = ["InMemoryDatastore@"]
968 registryStr = ":memory:"
970 def testIngest(self):
971 pass
974class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
975 """PosixDatastore specialization"""
976 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
977 fullConfigKey = ".datastore.datastores.1.formatters"
978 validationCanFail = True
979 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
980 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
981 "SecondDatastore"]
982 registryStr = "/gen3.sqlite3"
985class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
986 """Test that a yaml file in one location can refer to a root in another."""
988 datastoreStr = ["dir1"]
989 # Disable the makeRepo test since we are deliberately not using
990 # butler.yaml as the config name.
991 fullConfigKey = None
993 def setUp(self):
994 self.root = tempfile.mkdtemp(dir=TESTDIR)
996 # Make a new repository in one place
997 self.dir1 = os.path.join(self.root, "dir1")
998 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1000 # Move the yaml file to a different place and add a "root"
1001 self.dir2 = os.path.join(self.root, "dir2")
1002 safeMakeDir(self.dir2)
1003 configFile1 = os.path.join(self.dir1, "butler.yaml")
1004 config = Config(configFile1)
1005 config["root"] = self.dir1
1006 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1007 config.dumpToFile(configFile2)
1008 os.remove(configFile1)
1009 self.tmpConfigFile = configFile2
1011 def testFileLocations(self):
1012 self.assertNotEqual(self.dir1, self.dir2)
1013 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1014 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1015 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1018class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1019 """Test that a config file created by makeRepo outside of repo works."""
1021 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1023 def setUp(self):
1024 self.root = tempfile.mkdtemp(dir=TESTDIR)
1025 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1027 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1028 Butler.makeRepo(self.root, config=Config(self.configFile),
1029 outfile=self.tmpConfigFile)
1031 def tearDown(self):
1032 if os.path.exists(self.root2):
1033 shutil.rmtree(self.root2, ignore_errors=True)
1034 super().tearDown()
1036 def testConfigExistence(self):
1037 c = Config(self.tmpConfigFile)
1038 uri_config = ButlerURI(c["root"])
1039 uri_expected = ButlerURI(self.root, forceDirectory=True)
1040 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1041 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1043 def testPutGet(self):
1044 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1045 self.runPutGetTest(storageClass, "test_metric")
1048class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1049 """Test that a config file created by makeRepo outside of repo works."""
1051 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1053 def setUp(self):
1054 self.root = tempfile.mkdtemp(dir=TESTDIR)
1055 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1057 self.tmpConfigFile = self.root2
1058 Butler.makeRepo(self.root, config=Config(self.configFile),
1059 outfile=self.tmpConfigFile)
1061 def testConfigExistence(self):
1062 # Append the yaml file else Config constructor does not know the file
1063 # type.
1064 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1065 super().testConfigExistence()
1068class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1069 """Test that a config file created by makeRepo outside of repo works."""
1071 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1073 def setUp(self):
1074 self.root = tempfile.mkdtemp(dir=TESTDIR)
1075 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1077 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1078 Butler.makeRepo(self.root, config=Config(self.configFile),
1079 outfile=self.tmpConfigFile)
1082@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1083@mock_s3
1084class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1085 """S3Datastore specialization of a butler; an S3 storage Datastore +
1086 a local in-memory SqlRegistry.
1087 """
1088 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1089 fullConfigKey = None
1090 validationCanFail = True
1092 bucketName = "anybucketname"
1093 """Name of the Bucket that will be used in the tests. The name is read from
1094 the config file used with the tests during set-up.
1095 """
1097 root = "butlerRoot/"
1098 """Root repository directory expected to be used in case useTempRoot=False.
1099 Otherwise the root is set to a 20 characters long randomly generated string
1100 during set-up.
1101 """
1103 datastoreStr = [f"datastore={root}"]
1104 """Contains all expected root locations in a format expected to be
1105 returned by Butler stringification.
1106 """
1108 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1109 """The expected format of the S3Datastore string."""
1111 registryStr = ":memory:"
1112 """Expected format of the Registry string."""
1114 def genRoot(self):
1115 """Returns a random string of len 20 to serve as a root
1116 name for the temporary bucket repo.
1118 This is equivalent to tempfile.mkdtemp as this is what self.root
1119 becomes when useTempRoot is True.
1120 """
1121 rndstr = "".join(
1122 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1123 )
1124 return rndstr + "/"
1126 def setUp(self):
1127 config = Config(self.configFile)
1128 uri = ButlerURI(config[".datastore.datastore.root"])
1129 self.bucketName = uri.netloc
1131 # set up some fake credentials if they do not exist
1132 self.usingDummyCredentials = setAwsEnvCredentials()
1134 if self.useTempRoot:
1135 self.root = self.genRoot()
1136 rooturi = f"s3://{self.bucketName}/{self.root}"
1137 config.update({"datastore": {"datastore": {"root": rooturi}}})
1139 # MOTO needs to know that we expect Bucket bucketname to exist
1140 # (this used to be the class attribute bucketName)
1141 s3 = boto3.resource("s3")
1142 s3.create_bucket(Bucket=self.bucketName)
1144 self.datastoreStr = f"datastore={self.root}"
1145 self.datastoreName = [f"S3Datastore@{rooturi}"]
1146 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1147 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1149 def tearDown(self):
1150 s3 = boto3.resource("s3")
1151 bucket = s3.Bucket(self.bucketName)
1152 try:
1153 bucket.objects.all().delete()
1154 except botocore.exceptions.ClientError as e:
1155 if e.response["Error"]["Code"] == "404":
1156 # the key was not reachable - pass
1157 pass
1158 else:
1159 raise
1161 bucket = s3.Bucket(self.bucketName)
1162 bucket.delete()
1164 # unset any potentially set dummy credentials
1165 if self.usingDummyCredentials:
1166 unsetAwsEnvCredentials()
1168 def checkFileExists(self, root, relpath):
1169 """Checks if file exists at a given path (relative to root).
1171 Test testPutTemplates verifies actual physical existance of the files
1172 in the requested location. For S3Datastore this test is equivalent to
1173 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1174 """
1175 uri = ButlerURI(root)
1176 uri.updateFile(relpath)
1177 return s3CheckFileExists(uri)[0]
1179 @unittest.expectedFailure
1180 def testImportExport(self):
1181 super().testImportExport()
1184if __name__ == "__main__": 1184 ↛ 1185line 1184 didn't jump to line 1185, because the condition on line 1184 was never true
1185 unittest.main()