Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.utils import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler import ButlerURI
56from lsst.daf.butler import script
57from lsst.daf.butler.registry import MissingCollectionError
58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
60 unsetAwsEnvCredentials)
62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
64TESTDIR = os.path.abspath(os.path.dirname(__file__))
67def makeExampleMetrics():
68 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
69 {"a": [1, 2, 3],
70 "b": {"blue": 5, "red": "green"}},
71 [563, 234, 456.7, 752, 8, 9, 27]
72 )
75class TransactionTestError(Exception):
76 """Specific error for testing transactions, to prevent misdiagnosing
77 that might otherwise occur when a standard exception is used.
78 """
79 pass
82class ButlerConfigTests(unittest.TestCase):
83 """Simple tests for ButlerConfig that are not tested in other test cases.
84 """
86 def testSearchPath(self):
87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
89 config1 = ButlerConfig(configFile)
90 self.assertNotIn("testConfigs", "\n".join(cm.output))
92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
95 self.assertIn("testConfigs", "\n".join(cm.output))
97 key = ("datastore", "records", "table")
98 self.assertNotEqual(config1[key], config2[key])
99 self.assertEqual(config2[key], "override_record")
102class ButlerPutGetTests:
103 """Helper method for running a suite of put/get tests from different
104 butler configurations."""
106 root = None
108 @staticmethod
109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
110 """Create a DatasetType and register it
111 """
112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
113 registry.registerDatasetType(datasetType)
114 return datasetType
116 @classmethod
117 def setUpClass(cls):
118 cls.storageClassFactory = StorageClassFactory()
119 cls.storageClassFactory.addFromConfig(cls.configFile)
121 def assertGetComponents(self, butler, datasetRef, components, reference):
122 datasetTypeName = datasetRef.datasetType.name
123 dataId = datasetRef.dataId
124 for component in components:
125 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
126 result = butler.get(compTypeName, dataId)
127 self.assertEqual(result, getattr(reference, component))
129 def tearDown(self):
130 if self.root is not None and os.path.exists(self.root):
131 shutil.rmtree(self.root, ignore_errors=True)
133 def runPutGetTest(self, storageClass, datasetTypeName):
134 # New datasets will be added to run and tag, but we will only look in
135 # tag when looking up datasets.
136 run = "ingest/run"
137 tag = "ingest"
138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
140 # There will not be a collection yet
141 collections = set(butler.registry.queryCollections())
142 self.assertEqual(collections, set([run, tag]))
144 # Create and register a DatasetType
145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
149 # Try to create one that will have a name that is too long
150 with self.assertRaises(Exception) as cm:
151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
152 self.assertIn("check constraint", str(cm.exception).lower())
154 # Add needed Dimensions
155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
157 "name": "d-r",
158 "abstract_filter": "R"})
159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
160 "id": 1,
161 "name": "default"})
162 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
163 "name": "fourtwentythree", "physical_filter": "d-r",
164 "visit_system": 1})
166 # Add a second visit for some later tests
167 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
168 "name": "fourtwentyfour", "physical_filter": "d-r",
169 "visit_system": 1})
171 # Create and store a dataset
172 metric = makeExampleMetrics()
173 dataId = {"instrument": "DummyCamComp", "visit": 423}
175 # Create a DatasetRef for put
176 refIn = DatasetRef(datasetType, dataId, id=None)
178 # Put with a preexisting id should fail
179 with self.assertRaises(ValueError):
180 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
182 # Put and remove the dataset once as a DatasetRef, once as a dataId,
183 # and once with a DatasetType
184 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
185 with self.subTest(args=args):
186 ref = butler.put(metric, *args)
187 self.assertIsInstance(ref, DatasetRef)
189 # Test getDirect
190 metricOut = butler.getDirect(ref)
191 self.assertEqual(metric, metricOut)
192 # Test get
193 metricOut = butler.get(ref.datasetType.name, dataId)
194 self.assertEqual(metric, metricOut)
195 # Test get with a datasetRef
196 metricOut = butler.get(ref)
197 self.assertEqual(metric, metricOut)
198 # Test getDeferred with dataId
199 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
200 self.assertEqual(metric, metricOut)
201 # Test getDeferred with a datasetRef
202 metricOut = butler.getDeferred(ref).get()
203 self.assertEqual(metric, metricOut)
205 # Check we can get components
206 if storageClass.isComposite():
207 self.assertGetComponents(butler, ref,
208 ("summary", "data", "output"), metric)
210 # Remove from the tagged collection only; after that we
211 # shouldn't be able to find it unless we use the dataset_id.
212 butler.pruneDatasets([ref])
213 with self.assertRaises(LookupError):
214 butler.datasetExists(*args)
215 # Registry still knows about it, if we use the dataset_id.
216 self.assertEqual(butler.registry.getDataset(ref.id), ref)
217 # If we use the output ref with the dataset_id, we should
218 # still be able to load it with getDirect().
219 self.assertEqual(metric, butler.getDirect(ref))
221 # Reinsert into collection, then delete from Datastore *and*
222 # remove from collection.
223 butler.registry.associate(tag, [ref])
224 butler.pruneDatasets([ref], unstore=True)
225 # Lookup with original args should still fail.
226 with self.assertRaises(LookupError):
227 butler.datasetExists(*args)
228 # Now getDirect() should fail, too.
229 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
230 butler.getDirect(ref)
231 # Registry still knows about it, if we use the dataset_id.
232 self.assertEqual(butler.registry.getDataset(ref.id), ref)
234 # Now remove the dataset completely.
235 butler.pruneDatasets([ref], purge=True, unstore=True)
236 # Lookup with original args should still fail.
237 with self.assertRaises(LookupError):
238 butler.datasetExists(*args)
239 # getDirect() should still fail.
240 with self.assertRaises(FileNotFoundError):
241 butler.getDirect(ref)
242 # Registry shouldn't be able to find it by dataset_id anymore.
243 self.assertIsNone(butler.registry.getDataset(ref.id))
245 # Put the dataset again, since the last thing we did was remove it.
246 ref = butler.put(metric, refIn)
248 # Get with parameters
249 stop = 4
250 sliced = butler.get(ref, parameters={"slice": slice(stop)})
251 self.assertNotEqual(metric, sliced)
252 self.assertEqual(metric.summary, sliced.summary)
253 self.assertEqual(metric.output, sliced.output)
254 self.assertEqual(metric.data[:stop], sliced.data)
255 # getDeferred with parameters
256 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
257 self.assertNotEqual(metric, sliced)
258 self.assertEqual(metric.summary, sliced.summary)
259 self.assertEqual(metric.output, sliced.output)
260 self.assertEqual(metric.data[:stop], sliced.data)
261 # getDeferred with deferred parameters
262 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
263 self.assertNotEqual(metric, sliced)
264 self.assertEqual(metric.summary, sliced.summary)
265 self.assertEqual(metric.output, sliced.output)
266 self.assertEqual(metric.data[:stop], sliced.data)
268 if storageClass.isComposite():
269 # Check that components can be retrieved
270 # ref.compfonents will only be populated in certain cases
271 metricOut = butler.get(ref.datasetType.name, dataId)
272 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
273 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
274 summary = butler.get(compNameS, dataId)
275 self.assertEqual(summary, metric.summary)
276 data = butler.get(compNameD, dataId)
277 self.assertEqual(data, metric.data)
279 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
280 self.assertTrue(compRef.hasParentId)
282 # Create a Dataset type that has the same name but is inconsistent.
283 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
284 self.storageClassFactory.getStorageClass("Config"))
286 # Getting with a dataset type that does not match registry fails
287 with self.assertRaises(ValueError):
288 butler.get(inconsistentDatasetType, dataId)
290 # Combining a DatasetRef with a dataId should fail
291 with self.assertRaises(ValueError):
292 butler.get(ref, dataId)
293 # Getting with an explicit ref should fail if the id doesn't match
294 with self.assertRaises(ValueError):
295 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
297 # Getting a dataset with unknown parameters should fail
298 with self.assertRaises(KeyError):
299 butler.get(ref, parameters={"unsupported": True})
301 # Check we have a collection
302 collections = set(butler.registry.queryCollections())
303 self.assertEqual(collections, {run, tag})
305 # Clean up to check that we can remove something that may have
306 # already had a component removed
307 butler.pruneDatasets([ref], unstore=True, purge=True)
309 # Add a dataset back in since some downstream tests require
310 # something to be present
311 ref = butler.put(metric, refIn)
313 return butler
315 def testDeferredCollectionPassing(self):
316 # Construct a butler with no run or collection, but make it writeable.
317 butler = Butler(self.tmpConfigFile, writeable=True)
318 # Create and register a DatasetType
319 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
320 datasetType = self.addDatasetType("example", dimensions,
321 self.storageClassFactory.getStorageClass("StructuredData"),
322 butler.registry)
323 # Add needed Dimensions
324 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
325 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
326 "name": "d-r",
327 "abstract_filter": "R"})
328 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
329 "name": "fourtwentythree", "physical_filter": "d-r"})
330 dataId = {"instrument": "DummyCamComp", "visit": 423}
331 # Create dataset.
332 metric = makeExampleMetrics()
333 # Register a new run and put dataset.
334 run = "deferred"
335 butler.registry.registerRun(run)
336 ref = butler.put(metric, datasetType, dataId, run=run)
337 # Putting with no run should fail with TypeError.
338 with self.assertRaises(TypeError):
339 butler.put(metric, datasetType, dataId)
340 # Dataset should exist.
341 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
342 # We should be able to get the dataset back, but with and without
343 # a deferred dataset handle.
344 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
345 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
346 # Trying to find the dataset without any collection is a TypeError.
347 with self.assertRaises(TypeError):
348 butler.datasetExists(datasetType, dataId)
349 with self.assertRaises(TypeError):
350 butler.get(datasetType, dataId)
351 # Associate the dataset with a different collection.
352 butler.registry.registerCollection("tagged")
353 butler.registry.associate("tagged", [ref])
354 # Deleting the dataset from the new collection should make it findable
355 # in the original collection.
356 butler.pruneDatasets([ref], tags=["tagged"])
357 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
360class ButlerTests(ButlerPutGetTests):
361 """Tests for Butler.
362 """
363 useTempRoot = True
365 def setUp(self):
366 """Create a new butler root for each test."""
367 if self.useTempRoot:
368 self.root = tempfile.mkdtemp(dir=TESTDIR)
369 Butler.makeRepo(self.root, config=Config(self.configFile))
370 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
371 else:
372 self.root = None
373 self.tmpConfigFile = self.configFile
375 def testConstructor(self):
376 """Independent test of constructor.
377 """
378 butler = Butler(self.tmpConfigFile, run="ingest")
379 self.assertIsInstance(butler, Butler)
381 collections = set(butler.registry.queryCollections())
382 self.assertEqual(collections, {"ingest"})
384 butler2 = Butler(butler=butler, collections=["other"])
385 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
386 self.assertIsNone(butler2.run)
387 self.assertIs(butler.registry, butler2.registry)
388 self.assertIs(butler.datastore, butler2.datastore)
390 def testBasicPutGet(self):
391 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
392 self.runPutGetTest(storageClass, "test_metric")
394 def testCompositePutGetConcrete(self):
395 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
396 butler = self.runPutGetTest(storageClass, "test_metric")
398 # Should *not* be disassembled
399 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
400 self.assertEqual(len(datasets), 1)
401 uri, components = butler.getURIs(datasets[0])
402 self.assertIsInstance(uri, ButlerURI)
403 self.assertFalse(components)
404 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
405 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
407 # Predicted dataset
408 dataId = {"instrument": "DummyCamComp", "visit": 424}
409 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
410 self.assertFalse(components)
411 self.assertIsInstance(uri, ButlerURI)
412 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
413 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
415 def testCompositePutGetVirtual(self):
416 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
417 butler = self.runPutGetTest(storageClass, "test_metric_comp")
419 # Should be disassembled
420 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
421 self.assertEqual(len(datasets), 1)
422 uri, components = butler.getURIs(datasets[0])
424 if butler.datastore.isEphemeral:
425 # Never disassemble in-memory datastore
426 self.assertIsInstance(uri, ButlerURI)
427 self.assertFalse(components)
428 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
429 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
430 else:
431 self.assertIsNone(uri)
432 self.assertEqual(set(components), set(storageClass.components))
433 for compuri in components.values():
434 self.assertIsInstance(compuri, ButlerURI)
435 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
436 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
438 # Predicted dataset
439 dataId = {"instrument": "DummyCamComp", "visit": 424}
440 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
442 if butler.datastore.isEphemeral:
443 # Never disassembled
444 self.assertIsInstance(uri, ButlerURI)
445 self.assertFalse(components)
446 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
447 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
448 else:
449 self.assertIsNone(uri)
450 self.assertEqual(set(components), set(storageClass.components))
451 for compuri in components.values():
452 self.assertIsInstance(compuri, ButlerURI)
453 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
454 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
456 def testIngest(self):
457 butler = Butler(self.tmpConfigFile, run="ingest")
459 # Create and register a DatasetType
460 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
462 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
463 datasetTypeName = "metric"
465 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
467 # Add needed Dimensions
468 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
469 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
470 "name": "d-r",
471 "abstract_filter": "R"})
472 for detector in (1, 2):
473 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
474 "full_name": f"detector{detector}"})
476 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
477 "name": "fourtwentythree", "physical_filter": "d-r"},
478 {"instrument": "DummyCamComp", "id": 424,
479 "name": "fourtwentyfour", "physical_filter": "d-r"})
481 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
482 dataRoot = os.path.join(TESTDIR, "data", "basic")
483 datasets = []
484 for detector in (1, 2):
485 detector_name = f"detector_{detector}"
486 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
487 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
488 # Create a DatasetRef for ingest
489 refIn = DatasetRef(datasetType, dataId, id=None)
491 datasets.append(FileDataset(path=metricFile,
492 refs=[refIn],
493 formatter=formatter))
495 butler.ingest(*datasets, transfer="copy")
497 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
498 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
500 metrics1 = butler.get(datasetTypeName, dataId1)
501 metrics2 = butler.get(datasetTypeName, dataId2)
502 self.assertNotEqual(metrics1, metrics2)
504 # Compare URIs
505 uri1 = butler.getURI(datasetTypeName, dataId1)
506 uri2 = butler.getURI(datasetTypeName, dataId2)
507 self.assertNotEqual(uri1, uri2)
509 # Now do a multi-dataset but single file ingest
510 metricFile = os.path.join(dataRoot, "detectors.yaml")
511 refs = []
512 for detector in (1, 2):
513 detector_name = f"detector_{detector}"
514 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
515 # Create a DatasetRef for ingest
516 refs.append(DatasetRef(datasetType, dataId, id=None))
518 datasets = []
519 datasets.append(FileDataset(path=metricFile,
520 refs=refs,
521 formatter=MultiDetectorFormatter))
523 butler.ingest(*datasets, transfer="copy")
525 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
526 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
528 multi1 = butler.get(datasetTypeName, dataId1)
529 multi2 = butler.get(datasetTypeName, dataId2)
531 self.assertEqual(multi1, metrics1)
532 self.assertEqual(multi2, metrics2)
534 # Compare URIs
535 uri1 = butler.getURI(datasetTypeName, dataId1)
536 uri2 = butler.getURI(datasetTypeName, dataId2)
537 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
539 # Test that removing one does not break the second
540 # This line will issue a warning log message for a ChainedDatastore
541 # that uses an InMemoryDatastore since in-memory can not ingest
542 # files.
543 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
544 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
545 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
546 multi2b = butler.get(datasetTypeName, dataId2)
547 self.assertEqual(multi2, multi2b)
549 def testPruneCollections(self):
550 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
551 butler = Butler(self.tmpConfigFile, writeable=True)
552 # Load registry data with dimensions to hang datasets off of.
553 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
554 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
555 # Add some RUN-type collections.
556 run1 = "run1"
557 butler.registry.registerRun(run1)
558 run2 = "run2"
559 butler.registry.registerRun(run2)
560 # put some datasets. ref1 and ref2 have the same data ID, and are in
561 # different runs. ref3 has a different data ID.
562 metric = makeExampleMetrics()
563 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
564 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
565 butler.registry)
566 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
567 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
568 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
569 # Try to delete a RUN collection without purge, or with purge and not
570 # unstore.
571 with self.assertRaises(TypeError):
572 butler.pruneCollection(run1)
573 with self.assertRaises(TypeError):
574 butler.pruneCollection(run2, purge=True)
575 # Add a TAGGED collection and associate ref3 only into it.
576 tag1 = "tag1"
577 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
578 butler.registry.associate(tag1, [ref3])
579 # Add a CHAINED collection that searches run1 and then run2. It
580 # logically contains only ref1, because ref2 is shadowed due to them
581 # having the same data ID and dataset type.
582 chain1 = "chain1"
583 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
584 butler.registry.setCollectionChain(chain1, [run1, run2])
585 # Try to delete RUN collections, which should fail with complete
586 # rollback because they're still referenced by the CHAINED
587 # collection.
588 with self.assertRaises(Exception):
589 butler.pruneCollection(run1, pruge=True, unstore=True)
590 with self.assertRaises(Exception):
591 butler.pruneCollection(run2, pruge=True, unstore=True)
592 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
593 [ref1, ref2, ref3])
594 self.assertTrue(butler.datastore.exists(ref1))
595 self.assertTrue(butler.datastore.exists(ref2))
596 self.assertTrue(butler.datastore.exists(ref3))
597 # Try to delete CHAINED and TAGGED collections with purge; should not
598 # work.
599 with self.assertRaises(TypeError):
600 butler.pruneCollection(tag1, purge=True, unstore=True)
601 with self.assertRaises(TypeError):
602 butler.pruneCollection(chain1, purge=True, unstore=True)
603 # Remove the tagged collection with unstore=False. This should not
604 # affect the datasets.
605 butler.pruneCollection(tag1)
606 with self.assertRaises(MissingCollectionError):
607 butler.registry.getCollectionType(tag1)
608 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
609 [ref1, ref2, ref3])
610 self.assertTrue(butler.datastore.exists(ref1))
611 self.assertTrue(butler.datastore.exists(ref2))
612 self.assertTrue(butler.datastore.exists(ref3))
613 # Add the tagged collection back in, and remove it with unstore=True.
614 # This should remove ref3 only from the datastore.
615 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
616 butler.registry.associate(tag1, [ref3])
617 butler.pruneCollection(tag1, unstore=True)
618 with self.assertRaises(MissingCollectionError):
619 butler.registry.getCollectionType(tag1)
620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
621 [ref1, ref2, ref3])
622 self.assertTrue(butler.datastore.exists(ref1))
623 self.assertTrue(butler.datastore.exists(ref2))
624 self.assertFalse(butler.datastore.exists(ref3))
625 # Delete the chain with unstore=False. The datasets should not be
626 # affected at all.
627 butler.pruneCollection(chain1)
628 with self.assertRaises(MissingCollectionError):
629 butler.registry.getCollectionType(chain1)
630 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
631 [ref1, ref2, ref3])
632 self.assertTrue(butler.datastore.exists(ref1))
633 self.assertTrue(butler.datastore.exists(ref2))
634 self.assertFalse(butler.datastore.exists(ref3))
635 # Redefine and then delete the chain with unstore=True. Only ref1
636 # should be unstored (ref3 has already been unstored, but otherwise
637 # would be now).
638 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
639 butler.registry.setCollectionChain(chain1, [run1, run2])
640 butler.pruneCollection(chain1, unstore=True)
641 with self.assertRaises(MissingCollectionError):
642 butler.registry.getCollectionType(chain1)
643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
644 [ref1, ref2, ref3])
645 self.assertFalse(butler.datastore.exists(ref1))
646 self.assertTrue(butler.datastore.exists(ref2))
647 self.assertFalse(butler.datastore.exists(ref3))
648 # Remove run1. This removes ref1 and ref3 from the registry (they're
649 # already gone from the datastore, which is fine).
650 butler.pruneCollection(run1, purge=True, unstore=True)
651 with self.assertRaises(MissingCollectionError):
652 butler.registry.getCollectionType(run1)
653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
654 [ref2])
655 self.assertTrue(butler.datastore.exists(ref2))
656 # Remove run2. This removes ref2 from the registry and the datastore.
657 butler.pruneCollection(run2, purge=True, unstore=True)
658 with self.assertRaises(MissingCollectionError):
659 butler.registry.getCollectionType(run2)
660 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
661 [])
663 def testPickle(self):
664 """Test pickle support.
665 """
666 butler = Butler(self.tmpConfigFile, run="ingest")
667 butlerOut = pickle.loads(pickle.dumps(butler))
668 self.assertIsInstance(butlerOut, Butler)
669 self.assertEqual(butlerOut._config, butler._config)
670 self.assertEqual(butlerOut.collections, butler.collections)
671 self.assertEqual(butlerOut.run, butler.run)
673 def testGetDatasetTypes(self):
674 butler = Butler(self.tmpConfigFile, run="ingest")
675 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
676 dimensionEntries = [
677 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
678 {"instrument": "DummyCamComp"}),
679 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
680 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
681 ]
682 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
683 # Add needed Dimensions
684 for args in dimensionEntries:
685 butler.registry.insertDimensionData(*args)
687 # When a DatasetType is added to the registry entries are created
688 # for each component. Need entries for each component in the test
689 # configuration otherwise validation won't work. The ones that
690 # are deliberately broken will be ignored later.
691 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
692 components = set()
693 for datasetTypeName in datasetTypeNames:
694 # Create and register a DatasetType
695 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
697 for componentName in storageClass.components:
698 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
700 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
701 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
703 # Now that we have some dataset types registered, validate them
704 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
705 "datasetType.component"])
707 # Add a new datasetType that will fail template validation
708 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
709 if self.validationCanFail:
710 with self.assertRaises(ValidationError):
711 butler.validateConfiguration()
713 # Rerun validation but with a subset of dataset type names
714 butler.validateConfiguration(datasetTypeNames=["metric4"])
716 # Rerun validation but ignore the bad datasetType
717 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
718 "datasetType.component"])
720 def testTransaction(self):
721 butler = Butler(self.tmpConfigFile, run="ingest")
722 datasetTypeName = "test_metric"
723 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
724 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
725 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
726 "abstract_filter": "R"}),
727 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
728 "physical_filter": "d-r"}))
729 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
730 metric = makeExampleMetrics()
731 dataId = {"instrument": "DummyCam", "visit": 42}
732 # Create and register a DatasetType
733 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
734 with self.assertRaises(TransactionTestError):
735 with butler.transaction():
736 # Add needed Dimensions
737 for args in dimensionEntries:
738 butler.registry.insertDimensionData(*args)
739 # Store a dataset
740 ref = butler.put(metric, datasetTypeName, dataId)
741 self.assertIsInstance(ref, DatasetRef)
742 # Test getDirect
743 metricOut = butler.getDirect(ref)
744 self.assertEqual(metric, metricOut)
745 # Test get
746 metricOut = butler.get(datasetTypeName, dataId)
747 self.assertEqual(metric, metricOut)
748 # Check we can get components
749 self.assertGetComponents(butler, ref,
750 ("summary", "data", "output"), metric)
751 raise TransactionTestError("This should roll back the entire transaction")
752 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
753 butler.registry.expandDataId(dataId)
754 # Should raise LookupError for missing data ID value
755 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
756 butler.get(datasetTypeName, dataId)
757 # Also check explicitly if Dataset entry is missing
758 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
759 # Direct retrieval should not find the file in the Datastore
760 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
761 butler.getDirect(ref)
763 def testMakeRepo(self):
764 """Test that we can write butler configuration to a new repository via
765 the Butler.makeRepo interface and then instantiate a butler from the
766 repo root.
767 """
768 # Do not run the test if we know this datastore configuration does
769 # not support a file system root
770 if self.fullConfigKey is None:
771 return
773 # Remove the file created in setUp
774 os.unlink(self.tmpConfigFile)
776 createRegistry = not self.useTempRoot
777 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
778 createRegistry=createRegistry)
779 limited = Config(self.configFile)
780 butler1 = Butler(butlerConfig)
781 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
782 config=Config(self.configFile), overwrite=True)
783 full = Config(self.tmpConfigFile)
784 butler2 = Butler(butlerConfig)
785 # Butlers should have the same configuration regardless of whether
786 # defaults were expanded.
787 self.assertEqual(butler1._config, butler2._config)
788 # Config files loaded directly should not be the same.
789 self.assertNotEqual(limited, full)
790 # Make sure "limited" doesn't have a few keys we know it should be
791 # inheriting from defaults.
792 self.assertIn(self.fullConfigKey, full)
793 self.assertNotIn(self.fullConfigKey, limited)
795 # Collections don't appear until something is put in them
796 collections1 = set(butler1.registry.queryCollections())
797 self.assertEqual(collections1, set())
798 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
800 # Check that a config with no associated file name will not
801 # work properly with relocatable Butler repo
802 butlerConfig.configFile = None
803 with self.assertRaises(ValueError):
804 Butler(butlerConfig)
806 with self.assertRaises(FileExistsError):
807 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
808 config=Config(self.configFile), overwrite=False)
810 def testStringification(self):
811 butler = Butler(self.tmpConfigFile, run="ingest")
812 butlerStr = str(butler)
814 if self.datastoreStr is not None:
815 for testStr in self.datastoreStr:
816 self.assertIn(testStr, butlerStr)
817 if self.registryStr is not None:
818 self.assertIn(self.registryStr, butlerStr)
820 datastoreName = butler.datastore.name
821 if self.datastoreName is not None:
822 for testStr in self.datastoreName:
823 self.assertIn(testStr, datastoreName)
826class FileLikeDatastoreButlerTests(ButlerTests):
827 """Common tests and specialization of ButlerTests for butlers backed
828 by datastores that inherit from FileLikeDatastore.
829 """
831 def checkFileExists(self, root, path):
832 """Checks if file exists at a given path (relative to root).
834 Test testPutTemplates verifies actual physical existance of the files
835 in the requested location. For POSIXDatastore this test is equivalent
836 to `os.path.exists` call.
837 """
838 return os.path.exists(os.path.join(root, path))
840 def testPutTemplates(self):
841 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
842 butler = Butler(self.tmpConfigFile, run="ingest")
844 # Add needed Dimensions
845 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
846 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
847 "name": "d-r",
848 "abstract_filter": "R"})
849 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
850 "physical_filter": "d-r"})
851 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
852 "physical_filter": "d-r"})
854 # Create and store a dataset
855 metric = makeExampleMetrics()
857 # Create two almost-identical DatasetTypes (both will use default
858 # template)
859 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
860 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
861 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
862 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
864 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
865 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
866 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
868 # Put with exactly the data ID keys needed
869 ref = butler.put(metric, "metric1", dataId1)
870 self.assertTrue(self.checkFileExists(butler.datastore.root,
871 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
873 # Check the template based on dimensions
874 butler.datastore.templates.validateTemplates([ref])
876 # Put with extra data ID keys (physical_filter is an optional
877 # dependency); should not change template (at least the way we're
878 # defining them to behave now; the important thing is that they
879 # must be consistent).
880 ref = butler.put(metric, "metric2", dataId2)
881 self.assertTrue(self.checkFileExists(butler.datastore.root,
882 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
884 # Check the template based on dimensions
885 butler.datastore.templates.validateTemplates([ref])
887 # Now use a file template that will not result in unique filenames
888 ref = butler.put(metric, "metric3", dataId1)
890 # Check the template based on dimensions. This one is a bad template
891 with self.assertRaises(FileTemplateValidationError):
892 butler.datastore.templates.validateTemplates([ref])
894 with self.assertRaises(FileExistsError):
895 butler.put(metric, "metric3", dataId3)
897 def testImportExport(self):
898 # Run put/get tests just to create and populate a repo.
899 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
900 self.runImportExportTest(storageClass)
902 @unittest.expectedFailure
903 def testImportExportVirtualComposite(self):
904 # Run put/get tests just to create and populate a repo.
905 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
906 self.runImportExportTest(storageClass)
908 def runImportExportTest(self, storageClass):
909 exportButler = self.runPutGetTest(storageClass, "test_metric")
910 # Test that the repo actually has at least one dataset.
911 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
912 self.assertGreater(len(datasets), 0)
913 # Export those datasets. We used TemporaryDirectory because there
914 # doesn't seem to be a way to get the filename (as opposed to the file
915 # object) from any of tempfile's temporary-file context managers.
916 with tempfile.TemporaryDirectory() as exportDir:
917 # TODO: When PosixDatastore supports transfer-on-exist, add tests
918 # for that.
919 exportFile = os.path.join(exportDir, "exports.yaml")
920 with exportButler.export(filename=exportFile) as export:
921 export.saveDatasets(datasets)
922 self.assertTrue(os.path.exists(exportFile))
923 with tempfile.TemporaryDirectory() as importDir:
924 Butler.makeRepo(importDir, config=Config(self.configFile))
925 # Calling script.butlerImport tests the implementation of the
926 # butler command line interface "import" subcommand. Functions
927 # in the script folder are generally considered protected and
928 # should not be used as public api.
929 with open(exportFile, "r") as f:
930 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
931 directory=exportButler.datastore.root, transfer="symlink")
932 importButler = Butler(importDir, run="ingest/run")
933 for ref in datasets:
934 with self.subTest(ref=ref):
935 # Test for existence by passing in the DatasetType and
936 # data ID separately, to avoid lookup by dataset_id.
937 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
940class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
941 """PosixDatastore specialization of a butler"""
942 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
943 fullConfigKey = ".datastore.formatters"
944 validationCanFail = True
945 datastoreStr = ["/tmp"]
946 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
947 registryStr = "/gen3.sqlite3"
950class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
951 """InMemoryDatastore specialization of a butler"""
952 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
953 fullConfigKey = None
954 useTempRoot = False
955 validationCanFail = False
956 datastoreStr = ["datastore='InMemory"]
957 datastoreName = ["InMemoryDatastore@"]
958 registryStr = ":memory:"
960 def testIngest(self):
961 pass
964class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
965 """PosixDatastore specialization"""
966 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
967 fullConfigKey = ".datastore.datastores.1.formatters"
968 validationCanFail = True
969 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
970 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
971 "SecondDatastore"]
972 registryStr = "/gen3.sqlite3"
975class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
976 """Test that a yaml file in one location can refer to a root in another."""
978 datastoreStr = ["dir1"]
979 # Disable the makeRepo test since we are deliberately not using
980 # butler.yaml as the config name.
981 fullConfigKey = None
983 def setUp(self):
984 self.root = tempfile.mkdtemp(dir=TESTDIR)
986 # Make a new repository in one place
987 self.dir1 = os.path.join(self.root, "dir1")
988 Butler.makeRepo(self.dir1, config=Config(self.configFile))
990 # Move the yaml file to a different place and add a "root"
991 self.dir2 = os.path.join(self.root, "dir2")
992 safeMakeDir(self.dir2)
993 configFile1 = os.path.join(self.dir1, "butler.yaml")
994 config = Config(configFile1)
995 config["root"] = self.dir1
996 configFile2 = os.path.join(self.dir2, "butler2.yaml")
997 config.dumpToFile(configFile2)
998 os.remove(configFile1)
999 self.tmpConfigFile = configFile2
1001 def testFileLocations(self):
1002 self.assertNotEqual(self.dir1, self.dir2)
1003 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1004 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1005 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1008class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1009 """Test that a config file created by makeRepo outside of repo works."""
1011 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1013 def setUp(self):
1014 self.root = tempfile.mkdtemp(dir=TESTDIR)
1015 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1017 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1018 Butler.makeRepo(self.root, config=Config(self.configFile),
1019 outfile=self.tmpConfigFile)
1021 def tearDown(self):
1022 if os.path.exists(self.root2):
1023 shutil.rmtree(self.root2, ignore_errors=True)
1024 super().tearDown()
1026 def testConfigExistence(self):
1027 c = Config(self.tmpConfigFile)
1028 uri_config = ButlerURI(c["root"])
1029 uri_expected = ButlerURI(self.root, forceDirectory=True)
1030 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1031 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1033 def testPutGet(self):
1034 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1035 self.runPutGetTest(storageClass, "test_metric")
1038class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1039 """Test that a config file created by makeRepo outside of repo works."""
1041 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1043 def setUp(self):
1044 self.root = tempfile.mkdtemp(dir=TESTDIR)
1045 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1047 self.tmpConfigFile = self.root2
1048 Butler.makeRepo(self.root, config=Config(self.configFile),
1049 outfile=self.tmpConfigFile)
1051 def testConfigExistence(self):
1052 # Append the yaml file else Config constructor does not know the file
1053 # type.
1054 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1055 super().testConfigExistence()
1058class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1059 """Test that a config file created by makeRepo outside of repo works."""
1061 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1063 def setUp(self):
1064 self.root = tempfile.mkdtemp(dir=TESTDIR)
1065 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1067 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1068 Butler.makeRepo(self.root, config=Config(self.configFile),
1069 outfile=self.tmpConfigFile)
1072@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1073@mock_s3
1074class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1075 """S3Datastore specialization of a butler; an S3 storage Datastore +
1076 a local in-memory SqlRegistry.
1077 """
1078 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1079 fullConfigKey = None
1080 validationCanFail = True
1082 bucketName = "anybucketname"
1083 """Name of the Bucket that will be used in the tests. The name is read from
1084 the config file used with the tests during set-up.
1085 """
1087 root = "butlerRoot/"
1088 """Root repository directory expected to be used in case useTempRoot=False.
1089 Otherwise the root is set to a 20 characters long randomly generated string
1090 during set-up.
1091 """
1093 datastoreStr = [f"datastore={root}"]
1094 """Contains all expected root locations in a format expected to be
1095 returned by Butler stringification.
1096 """
1098 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1099 """The expected format of the S3Datastore string."""
1101 registryStr = ":memory:"
1102 """Expected format of the Registry string."""
1104 def genRoot(self):
1105 """Returns a random string of len 20 to serve as a root
1106 name for the temporary bucket repo.
1108 This is equivalent to tempfile.mkdtemp as this is what self.root
1109 becomes when useTempRoot is True.
1110 """
1111 rndstr = "".join(
1112 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1113 )
1114 return rndstr + "/"
1116 def setUp(self):
1117 config = Config(self.configFile)
1118 uri = ButlerURI(config[".datastore.datastore.root"])
1119 self.bucketName = uri.netloc
1121 # set up some fake credentials if they do not exist
1122 self.usingDummyCredentials = setAwsEnvCredentials()
1124 if self.useTempRoot:
1125 self.root = self.genRoot()
1126 rooturi = f"s3://{self.bucketName}/{self.root}"
1127 config.update({"datastore": {"datastore": {"root": rooturi}}})
1129 # MOTO needs to know that we expect Bucket bucketname to exist
1130 # (this used to be the class attribute bucketName)
1131 s3 = boto3.resource("s3")
1132 s3.create_bucket(Bucket=self.bucketName)
1134 self.datastoreStr = f"datastore={self.root}"
1135 self.datastoreName = [f"S3Datastore@{rooturi}"]
1136 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1137 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1139 def tearDown(self):
1140 s3 = boto3.resource("s3")
1141 bucket = s3.Bucket(self.bucketName)
1142 try:
1143 bucket.objects.all().delete()
1144 except botocore.exceptions.ClientError as e:
1145 if e.response["Error"]["Code"] == "404":
1146 # the key was not reachable - pass
1147 pass
1148 else:
1149 raise
1151 bucket = s3.Bucket(self.bucketName)
1152 bucket.delete()
1154 # unset any potentially set dummy credentials
1155 if self.usingDummyCredentials:
1156 unsetAwsEnvCredentials()
1158 def checkFileExists(self, root, relpath):
1159 """Checks if file exists at a given path (relative to root).
1161 Test testPutTemplates verifies actual physical existance of the files
1162 in the requested location. For S3Datastore this test is equivalent to
1163 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1164 """
1165 uri = ButlerURI(root)
1166 uri.updateFile(relpath)
1167 return s3CheckFileExists(uri)[0]
1169 @unittest.expectedFailure
1170 def testImportExport(self):
1171 super().testImportExport()
1174if __name__ == "__main__": 1174 ↛ 1175line 1174 didn't jump to line 1175, because the condition on line 1174 was never true
1175 unittest.main()