Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.utils import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler import ButlerURI
56from lsst.daf.butler import script
57from lsst.daf.butler.registry import MissingCollectionError
58from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
59from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
60 unsetAwsEnvCredentials)
62from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
64TESTDIR = os.path.abspath(os.path.dirname(__file__))
67def makeExampleMetrics():
68 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
69 {"a": [1, 2, 3],
70 "b": {"blue": 5, "red": "green"}},
71 [563, 234, 456.7, 752, 8, 9, 27]
72 )
75class TransactionTestError(Exception):
76 """Specific error for testing transactions, to prevent misdiagnosing
77 that might otherwise occur when a standard exception is used.
78 """
79 pass
82class ButlerConfigTests(unittest.TestCase):
83 """Simple tests for ButlerConfig that are not tested in other test cases.
84 """
86 def testSearchPath(self):
87 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
88 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
89 config1 = ButlerConfig(configFile)
90 self.assertNotIn("testConfigs", "\n".join(cm.output))
92 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
93 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
94 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
95 self.assertIn("testConfigs", "\n".join(cm.output))
97 key = ("datastore", "records", "table")
98 self.assertNotEqual(config1[key], config2[key])
99 self.assertEqual(config2[key], "override_record")
102class ButlerPutGetTests:
103 """Helper method for running a suite of put/get tests from different
104 butler configurations."""
106 root = None
108 @staticmethod
109 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
110 """Create a DatasetType and register it
111 """
112 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
113 registry.registerDatasetType(datasetType)
114 return datasetType
116 @classmethod
117 def setUpClass(cls):
118 cls.storageClassFactory = StorageClassFactory()
119 cls.storageClassFactory.addFromConfig(cls.configFile)
121 def assertGetComponents(self, butler, datasetRef, components, reference):
122 datasetTypeName = datasetRef.datasetType.name
123 dataId = datasetRef.dataId
124 for component in components:
125 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
126 result = butler.get(compTypeName, dataId)
127 self.assertEqual(result, getattr(reference, component))
129 def tearDown(self):
130 if self.root is not None and os.path.exists(self.root):
131 shutil.rmtree(self.root, ignore_errors=True)
133 def runPutGetTest(self, storageClass, datasetTypeName):
134 # New datasets will be added to run and tag, but we will only look in
135 # tag when looking up datasets.
136 run = "ingest/run"
137 tag = "ingest"
138 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
140 # There will not be a collection yet
141 collections = set(butler.registry.queryCollections())
142 self.assertEqual(collections, set([run, tag]))
144 # Create and register a DatasetType
145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
147 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
149 # Try to create one that will have a name that is too long
150 with self.assertRaises(Exception) as cm:
151 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
152 self.assertIn("check constraint", str(cm.exception).lower())
154 # Add needed Dimensions
155 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
156 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
157 "name": "d-r",
158 "abstract_filter": "R"})
159 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
160 "id": 1,
161 "name": "default"})
162 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
163 "name": "fourtwentythree", "physical_filter": "d-r",
164 "visit_system": 1})
166 # Add a second visit for some later tests
167 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
168 "name": "fourtwentyfour", "physical_filter": "d-r",
169 "visit_system": 1})
171 # Create and store a dataset
172 metric = makeExampleMetrics()
173 dataId = {"instrument": "DummyCamComp", "visit": 423}
175 # Create a DatasetRef for put
176 refIn = DatasetRef(datasetType, dataId, id=None)
178 # Put with a preexisting id should fail
179 with self.assertRaises(ValueError):
180 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
182 # Put and remove the dataset once as a DatasetRef, once as a dataId,
183 # and once with a DatasetType
184 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
185 with self.subTest(args=args):
186 ref = butler.put(metric, *args)
187 self.assertIsInstance(ref, DatasetRef)
189 # Test getDirect
190 metricOut = butler.getDirect(ref)
191 self.assertEqual(metric, metricOut)
192 # Test get
193 metricOut = butler.get(ref.datasetType.name, dataId)
194 self.assertEqual(metric, metricOut)
195 # Test get with a datasetRef
196 metricOut = butler.get(ref)
197 self.assertEqual(metric, metricOut)
198 # Test getDeferred with dataId
199 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
200 self.assertEqual(metric, metricOut)
201 # Test getDeferred with a datasetRef
202 metricOut = butler.getDeferred(ref).get()
203 self.assertEqual(metric, metricOut)
205 # Check we can get components
206 if storageClass.isComposite():
207 self.assertGetComponents(butler, ref,
208 ("summary", "data", "output"), metric)
210 # Remove from the tagged collection only; after that we
211 # shouldn't be able to find it unless we use the dataset_id.
212 butler.pruneDatasets([ref])
213 with self.assertRaises(LookupError):
214 butler.datasetExists(*args)
215 # Registry still knows about it, if we use the dataset_id.
216 self.assertEqual(butler.registry.getDataset(ref.id), ref)
217 # If we use the output ref with the dataset_id, we should
218 # still be able to load it with getDirect().
219 self.assertEqual(metric, butler.getDirect(ref))
221 # Reinsert into collection, then delete from Datastore *and*
222 # remove from collection.
223 butler.registry.associate(tag, [ref])
224 butler.pruneDatasets([ref], unstore=True)
225 # Lookup with original args should still fail.
226 with self.assertRaises(LookupError):
227 butler.datasetExists(*args)
228 # Now getDirect() should fail, too.
229 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
230 butler.getDirect(ref)
231 # Registry still knows about it, if we use the dataset_id.
232 self.assertEqual(butler.registry.getDataset(ref.id), ref)
234 # Now remove the dataset completely.
235 butler.pruneDatasets([ref], purge=True, unstore=True)
236 # Lookup with original args should still fail.
237 with self.assertRaises(LookupError):
238 butler.datasetExists(*args)
239 # getDirect() should still fail.
240 with self.assertRaises(FileNotFoundError):
241 butler.getDirect(ref)
242 # Registry shouldn't be able to find it by dataset_id anymore.
243 self.assertIsNone(butler.registry.getDataset(ref.id))
245 # Put the dataset again, since the last thing we did was remove it.
246 ref = butler.put(metric, refIn)
248 # Get with parameters
249 stop = 4
250 sliced = butler.get(ref, parameters={"slice": slice(stop)})
251 self.assertNotEqual(metric, sliced)
252 self.assertEqual(metric.summary, sliced.summary)
253 self.assertEqual(metric.output, sliced.output)
254 self.assertEqual(metric.data[:stop], sliced.data)
255 # getDeferred with parameters
256 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
257 self.assertNotEqual(metric, sliced)
258 self.assertEqual(metric.summary, sliced.summary)
259 self.assertEqual(metric.output, sliced.output)
260 self.assertEqual(metric.data[:stop], sliced.data)
261 # getDeferred with deferred parameters
262 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
263 self.assertNotEqual(metric, sliced)
264 self.assertEqual(metric.summary, sliced.summary)
265 self.assertEqual(metric.output, sliced.output)
266 self.assertEqual(metric.data[:stop], sliced.data)
268 if storageClass.isComposite():
269 # Check that components can be retrieved
270 # ref.compfonents will only be populated in certain cases
271 metricOut = butler.get(ref.datasetType.name, dataId)
272 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
273 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
274 summary = butler.get(compNameS, dataId)
275 self.assertEqual(summary, metric.summary)
276 data = butler.get(compNameD, dataId)
277 self.assertEqual(data, metric.data)
279 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
280 self.assertTrue(compRef.hasParentId)
282 # Create a Dataset type that has the same name but is inconsistent.
283 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
284 self.storageClassFactory.getStorageClass("Config"))
286 # Getting with a dataset type that does not match registry fails
287 with self.assertRaises(ValueError):
288 butler.get(inconsistentDatasetType, dataId)
290 # Combining a DatasetRef with a dataId should fail
291 with self.assertRaises(ValueError):
292 butler.get(ref, dataId)
293 # Getting with an explicit ref should fail if the id doesn't match
294 with self.assertRaises(ValueError):
295 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
297 # Getting a dataset with unknown parameters should fail
298 with self.assertRaises(KeyError):
299 butler.get(ref, parameters={"unsupported": True})
301 # Check we have a collection
302 collections = set(butler.registry.queryCollections())
303 self.assertEqual(collections, {run, tag})
305 # Clean up to check that we can remove something that may have
306 # already had a component removed
307 butler.pruneDatasets([ref], unstore=True, purge=True)
309 # Add a dataset back in since some downstream tests require
310 # something to be present
311 ref = butler.put(metric, refIn)
313 return butler
315 def testDeferredCollectionPassing(self):
316 # Construct a butler with no run or collection, but make it writeable.
317 butler = Butler(self.tmpConfigFile, writeable=True)
318 # Create and register a DatasetType
319 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
320 datasetType = self.addDatasetType("example", dimensions,
321 self.storageClassFactory.getStorageClass("StructuredData"),
322 butler.registry)
323 # Add needed Dimensions
324 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
325 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
326 "name": "d-r",
327 "abstract_filter": "R"})
328 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
329 "name": "fourtwentythree", "physical_filter": "d-r"})
330 dataId = {"instrument": "DummyCamComp", "visit": 423}
331 # Create dataset.
332 metric = makeExampleMetrics()
333 # Register a new run and put dataset.
334 run = "deferred"
335 butler.registry.registerRun(run)
336 ref = butler.put(metric, datasetType, dataId, run=run)
337 # Putting with no run should fail with TypeError.
338 with self.assertRaises(TypeError):
339 butler.put(metric, datasetType, dataId)
340 # Dataset should exist.
341 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
342 # We should be able to get the dataset back, but with and without
343 # a deferred dataset handle.
344 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
345 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
346 # Trying to find the dataset without any collection is a TypeError.
347 with self.assertRaises(TypeError):
348 butler.datasetExists(datasetType, dataId)
349 with self.assertRaises(TypeError):
350 butler.get(datasetType, dataId)
351 # Associate the dataset with a different collection.
352 butler.registry.registerCollection("tagged")
353 butler.registry.associate("tagged", [ref])
354 # Deleting the dataset from the new collection should make it findable
355 # in the original collection.
356 butler.pruneDatasets([ref], tags=["tagged"])
357 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
360class ButlerTests(ButlerPutGetTests):
361 """Tests for Butler.
362 """
363 useTempRoot = True
365 def setUp(self):
366 """Create a new butler root for each test."""
367 if self.useTempRoot:
368 self.root = tempfile.mkdtemp(dir=TESTDIR)
369 Butler.makeRepo(self.root, config=Config(self.configFile))
370 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
371 else:
372 self.root = None
373 self.tmpConfigFile = self.configFile
375 def testConstructor(self):
376 """Independent test of constructor.
377 """
378 butler = Butler(self.tmpConfigFile, run="ingest")
379 self.assertIsInstance(butler, Butler)
381 collections = set(butler.registry.queryCollections())
382 self.assertEqual(collections, {"ingest"})
384 butler2 = Butler(butler=butler, collections=["other"])
385 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
386 self.assertIsNone(butler2.run)
387 self.assertIs(butler.registry, butler2.registry)
388 self.assertIs(butler.datastore, butler2.datastore)
390 def testBasicPutGet(self):
391 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
392 self.runPutGetTest(storageClass, "test_metric")
394 def testCompositePutGetConcrete(self):
395 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
396 butler = self.runPutGetTest(storageClass, "test_metric")
398 # Should *not* be disassembled
399 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
400 self.assertEqual(len(datasets), 1)
401 uri, components = butler.getURIs(datasets[0])
402 self.assertIsInstance(uri, ButlerURI)
403 self.assertFalse(components)
404 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
405 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
407 # Predicted dataset
408 dataId = {"instrument": "DummyCamComp", "visit": 424}
409 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
410 self.assertFalse(components)
411 self.assertIsInstance(uri, ButlerURI)
412 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
413 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
415 def testCompositePutGetVirtual(self):
416 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
417 butler = self.runPutGetTest(storageClass, "test_metric_comp")
419 # Should be disassembled
420 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
421 self.assertEqual(len(datasets), 1)
422 uri, components = butler.getURIs(datasets[0])
424 if butler.datastore.isEphemeral:
425 # Never disassemble in-memory datastore
426 self.assertIsInstance(uri, ButlerURI)
427 self.assertFalse(components)
428 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
429 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
430 else:
431 self.assertIsNone(uri)
432 self.assertEqual(set(components), set(storageClass.components))
433 for compuri in components.values():
434 self.assertIsInstance(compuri, ButlerURI)
435 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
436 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
438 # Predicted dataset
439 dataId = {"instrument": "DummyCamComp", "visit": 424}
440 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
442 if butler.datastore.isEphemeral:
443 # Never disassembled
444 self.assertIsInstance(uri, ButlerURI)
445 self.assertFalse(components)
446 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
447 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
448 else:
449 self.assertIsNone(uri)
450 self.assertEqual(set(components), set(storageClass.components))
451 for compuri in components.values():
452 self.assertIsInstance(compuri, ButlerURI)
453 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
454 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
456 def testIngest(self):
457 butler = Butler(self.tmpConfigFile, run="ingest")
459 # Create and register a DatasetType
460 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
462 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
463 datasetTypeName = "metric"
465 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
467 # Add needed Dimensions
468 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
469 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
470 "name": "d-r",
471 "abstract_filter": "R"})
472 for detector in (1, 2):
473 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
474 "full_name": f"detector{detector}"})
476 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
477 "name": "fourtwentythree", "physical_filter": "d-r"},
478 {"instrument": "DummyCamComp", "id": 424,
479 "name": "fourtwentyfour", "physical_filter": "d-r"})
481 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
482 dataRoot = os.path.join(TESTDIR, "data", "basic")
483 datasets = []
484 for detector in (1, 2):
485 detector_name = f"detector_{detector}"
486 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
487 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
488 # Create a DatasetRef for ingest
489 refIn = DatasetRef(datasetType, dataId, id=None)
491 datasets.append(FileDataset(path=metricFile,
492 refs=[refIn],
493 formatter=formatter))
495 butler.ingest(*datasets, transfer="copy")
497 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
498 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
500 metrics1 = butler.get(datasetTypeName, dataId1)
501 metrics2 = butler.get(datasetTypeName, dataId2)
502 self.assertNotEqual(metrics1, metrics2)
504 # Compare URIs
505 uri1 = butler.getURI(datasetTypeName, dataId1)
506 uri2 = butler.getURI(datasetTypeName, dataId2)
507 self.assertNotEqual(uri1, uri2)
509 # Now do a multi-dataset but single file ingest
510 metricFile = os.path.join(dataRoot, "detectors.yaml")
511 refs = []
512 for detector in (1, 2):
513 detector_name = f"detector_{detector}"
514 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
515 # Create a DatasetRef for ingest
516 refs.append(DatasetRef(datasetType, dataId, id=None))
518 datasets = []
519 datasets.append(FileDataset(path=metricFile,
520 refs=refs,
521 formatter=MultiDetectorFormatter))
523 butler.ingest(*datasets, transfer="copy")
525 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
526 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
528 multi1 = butler.get(datasetTypeName, dataId1)
529 multi2 = butler.get(datasetTypeName, dataId2)
531 self.assertEqual(multi1, metrics1)
532 self.assertEqual(multi2, metrics2)
534 # Compare URIs
535 uri1 = butler.getURI(datasetTypeName, dataId1)
536 uri2 = butler.getURI(datasetTypeName, dataId2)
537 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
539 # Test that removing one does not break the second
540 # This line will issue a warning log message for a ChainedDatastore
541 # that uses an InMemoryDatastore since in-memory can not ingest
542 # files.
543 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
544 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
545 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
546 multi2b = butler.get(datasetTypeName, dataId2)
547 self.assertEqual(multi2, multi2b)
549 def testPruneCollections(self):
550 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
551 butler = Butler(self.tmpConfigFile, writeable=True)
552 # Load registry data with dimensions to hang datasets off of.
553 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
554 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
555 # Add some RUN-type collections.
556 run1 = "run1"
557 butler.registry.registerRun(run1)
558 run2 = "run2"
559 butler.registry.registerRun(run2)
560 # put some datasets. ref1 and ref2 have the same data ID, and are in
561 # different runs. ref3 has a different data ID.
562 metric = makeExampleMetrics()
563 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
564 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
565 butler.registry)
566 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
567 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
568 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
569 # Try to delete a RUN collection without purge, or with purge and not
570 # unstore.
571 with self.assertRaises(TypeError):
572 butler.pruneCollection(run1)
573 with self.assertRaises(TypeError):
574 butler.pruneCollection(run2, purge=True)
575 # Add a TAGGED collection and associate ref3 only into it.
576 tag1 = "tag1"
577 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
578 butler.registry.associate(tag1, [ref3])
579 # Add a CHAINED collection that searches run1 and then run2. It
580 # logically contains only ref1, because ref2 is shadowed due to them
581 # having the same data ID and dataset type.
582 chain1 = "chain1"
583 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
584 butler.registry.setCollectionChain(chain1, [run1, run2])
585 # Try to delete RUN collections, which should fail with complete
586 # rollback because they're still referenced by the CHAINED
587 # collection.
588 with self.assertRaises(Exception):
589 butler.pruneCollection(run1, pruge=True, unstore=True)
590 with self.assertRaises(Exception):
591 butler.pruneCollection(run2, pruge=True, unstore=True)
592 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
593 [ref1, ref2, ref3])
594 self.assertTrue(butler.datastore.exists(ref1))
595 self.assertTrue(butler.datastore.exists(ref2))
596 self.assertTrue(butler.datastore.exists(ref3))
597 # Try to delete CHAINED and TAGGED collections with purge; should not
598 # work.
599 with self.assertRaises(TypeError):
600 butler.pruneCollection(tag1, purge=True, unstore=True)
601 with self.assertRaises(TypeError):
602 butler.pruneCollection(chain1, purge=True, unstore=True)
603 # Remove the tagged collection with unstore=False. This should not
604 # affect the datasets.
605 butler.pruneCollection(tag1)
606 with self.assertRaises(MissingCollectionError):
607 butler.registry.getCollectionType(tag1)
608 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
609 [ref1, ref2, ref3])
610 self.assertTrue(butler.datastore.exists(ref1))
611 self.assertTrue(butler.datastore.exists(ref2))
612 self.assertTrue(butler.datastore.exists(ref3))
613 # Add the tagged collection back in, and remove it with unstore=True.
614 # This should remove ref3 only from the datastore.
615 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
616 butler.registry.associate(tag1, [ref3])
617 butler.pruneCollection(tag1, unstore=True)
618 with self.assertRaises(MissingCollectionError):
619 butler.registry.getCollectionType(tag1)
620 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
621 [ref1, ref2, ref3])
622 self.assertTrue(butler.datastore.exists(ref1))
623 self.assertTrue(butler.datastore.exists(ref2))
624 self.assertFalse(butler.datastore.exists(ref3))
625 # Delete the chain with unstore=False. The datasets should not be
626 # affected at all.
627 butler.pruneCollection(chain1)
628 with self.assertRaises(MissingCollectionError):
629 butler.registry.getCollectionType(chain1)
630 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
631 [ref1, ref2, ref3])
632 self.assertTrue(butler.datastore.exists(ref1))
633 self.assertTrue(butler.datastore.exists(ref2))
634 self.assertFalse(butler.datastore.exists(ref3))
635 # Redefine and then delete the chain with unstore=True. Only ref1
636 # should be unstored (ref3 has already been unstored, but otherwise
637 # would be now).
638 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
639 butler.registry.setCollectionChain(chain1, [run1, run2])
640 butler.pruneCollection(chain1, unstore=True)
641 with self.assertRaises(MissingCollectionError):
642 butler.registry.getCollectionType(chain1)
643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
644 [ref1, ref2, ref3])
645 self.assertFalse(butler.datastore.exists(ref1))
646 self.assertTrue(butler.datastore.exists(ref2))
647 self.assertFalse(butler.datastore.exists(ref3))
648 # Remove run1. This removes ref1 and ref3 from the registry (they're
649 # already gone from the datastore, which is fine).
650 butler.pruneCollection(run1, purge=True, unstore=True)
651 with self.assertRaises(MissingCollectionError):
652 butler.registry.getCollectionType(run1)
653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
654 [ref2])
655 self.assertTrue(butler.datastore.exists(ref2))
656 # Remove run2. This removes ref2 from the registry and the datastore.
657 butler.pruneCollection(run2, purge=True, unstore=True)
658 with self.assertRaises(MissingCollectionError):
659 butler.registry.getCollectionType(run2)
660 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
661 [])
663 def testPickle(self):
664 """Test pickle support.
665 """
666 butler = Butler(self.tmpConfigFile, run="ingest")
667 butlerOut = pickle.loads(pickle.dumps(butler))
668 self.assertIsInstance(butlerOut, Butler)
669 self.assertEqual(butlerOut._config, butler._config)
670 self.assertEqual(butlerOut.collections, butler.collections)
671 self.assertEqual(butlerOut.run, butler.run)
673 def testGetDatasetTypes(self):
674 butler = Butler(self.tmpConfigFile, run="ingest")
675 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
676 dimensionEntries = [
677 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
678 {"instrument": "DummyCamComp"}),
679 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
680 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
681 ]
682 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
683 # Add needed Dimensions
684 for args in dimensionEntries:
685 butler.registry.insertDimensionData(*args)
687 # When a DatasetType is added to the registry entries are created
688 # for each component. Need entries for each component in the test
689 # configuration otherwise validation won't work. The ones that
690 # are deliberately broken will be ignored later.
691 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
692 components = set()
693 for datasetTypeName in datasetTypeNames:
694 # Create and register a DatasetType
695 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
697 for componentName in storageClass.components:
698 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
700 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
701 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
703 # Now that we have some dataset types registered, validate them
704 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
705 "datasetType.component"])
707 # Add a new datasetType that will fail template validation
708 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
709 if self.validationCanFail:
710 with self.assertRaises(ValidationError):
711 butler.validateConfiguration()
713 # Rerun validation but with a subset of dataset type names
714 butler.validateConfiguration(datasetTypeNames=["metric4"])
716 # Rerun validation but ignore the bad datasetType
717 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
718 "datasetType.component"])
720 def testTransaction(self):
721 butler = Butler(self.tmpConfigFile, run="ingest")
722 datasetTypeName = "test_metric"
723 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
724 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
725 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
726 "abstract_filter": "R"}),
727 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
728 "physical_filter": "d-r"}))
729 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
730 metric = makeExampleMetrics()
731 dataId = {"instrument": "DummyCam", "visit": 42}
732 # Create and register a DatasetType
733 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
734 with self.assertRaises(TransactionTestError):
735 with butler.transaction():
736 # Add needed Dimensions
737 for args in dimensionEntries:
738 butler.registry.insertDimensionData(*args)
739 # Store a dataset
740 ref = butler.put(metric, datasetTypeName, dataId)
741 self.assertIsInstance(ref, DatasetRef)
742 # Test getDirect
743 metricOut = butler.getDirect(ref)
744 self.assertEqual(metric, metricOut)
745 # Test get
746 metricOut = butler.get(datasetTypeName, dataId)
747 self.assertEqual(metric, metricOut)
748 # Check we can get components
749 self.assertGetComponents(butler, ref,
750 ("summary", "data", "output"), metric)
751 raise TransactionTestError("This should roll back the entire transaction")
752 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
753 butler.registry.expandDataId(dataId)
754 # Should raise LookupError for missing data ID value
755 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
756 butler.get(datasetTypeName, dataId)
757 # Also check explicitly if Dataset entry is missing
758 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
759 # Direct retrieval should not find the file in the Datastore
760 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
761 butler.getDirect(ref)
763 def testMakeRepo(self):
764 """Test that we can write butler configuration to a new repository via
765 the Butler.makeRepo interface and then instantiate a butler from the
766 repo root.
767 """
768 # Do not run the test if we know this datastore configuration does
769 # not support a file system root
770 if self.fullConfigKey is None:
771 return
773 # Remove the file created in setUp
774 os.unlink(self.tmpConfigFile)
776 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
777 limited = Config(self.configFile)
778 butler1 = Butler(butlerConfig)
779 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
780 config=Config(self.configFile), overwrite=True)
781 full = Config(self.tmpConfigFile)
782 butler2 = Butler(butlerConfig)
783 # Butlers should have the same configuration regardless of whether
784 # defaults were expanded.
785 self.assertEqual(butler1._config, butler2._config)
786 # Config files loaded directly should not be the same.
787 self.assertNotEqual(limited, full)
788 # Make sure "limited" doesn't have a few keys we know it should be
789 # inheriting from defaults.
790 self.assertIn(self.fullConfigKey, full)
791 self.assertNotIn(self.fullConfigKey, limited)
793 # Collections don't appear until something is put in them
794 collections1 = set(butler1.registry.queryCollections())
795 self.assertEqual(collections1, set())
796 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
798 # Check that a config with no associated file name will not
799 # work properly with relocatable Butler repo
800 butlerConfig.configFile = None
801 with self.assertRaises(ValueError):
802 Butler(butlerConfig)
804 with self.assertRaises(FileExistsError):
805 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
806 config=Config(self.configFile), overwrite=False)
808 def testStringification(self):
809 butler = Butler(self.tmpConfigFile, run="ingest")
810 butlerStr = str(butler)
812 if self.datastoreStr is not None:
813 for testStr in self.datastoreStr:
814 self.assertIn(testStr, butlerStr)
815 if self.registryStr is not None:
816 self.assertIn(self.registryStr, butlerStr)
818 datastoreName = butler.datastore.name
819 if self.datastoreName is not None:
820 for testStr in self.datastoreName:
821 self.assertIn(testStr, datastoreName)
824class FileLikeDatastoreButlerTests(ButlerTests):
825 """Common tests and specialization of ButlerTests for butlers backed
826 by datastores that inherit from FileLikeDatastore.
827 """
829 def checkFileExists(self, root, path):
830 """Checks if file exists at a given path (relative to root).
832 Test testPutTemplates verifies actual physical existance of the files
833 in the requested location. For POSIXDatastore this test is equivalent
834 to `os.path.exists` call.
835 """
836 return os.path.exists(os.path.join(root, path))
838 def testPutTemplates(self):
839 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
840 butler = Butler(self.tmpConfigFile, run="ingest")
842 # Add needed Dimensions
843 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
844 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
845 "name": "d-r",
846 "abstract_filter": "R"})
847 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
848 "physical_filter": "d-r"})
849 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
850 "physical_filter": "d-r"})
852 # Create and store a dataset
853 metric = makeExampleMetrics()
855 # Create two almost-identical DatasetTypes (both will use default
856 # template)
857 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
858 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
859 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
860 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
862 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
863 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
864 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
866 # Put with exactly the data ID keys needed
867 ref = butler.put(metric, "metric1", dataId1)
868 self.assertTrue(self.checkFileExists(butler.datastore.root,
869 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
871 # Check the template based on dimensions
872 butler.datastore.templates.validateTemplates([ref])
874 # Put with extra data ID keys (physical_filter is an optional
875 # dependency); should not change template (at least the way we're
876 # defining them to behave now; the important thing is that they
877 # must be consistent).
878 ref = butler.put(metric, "metric2", dataId2)
879 self.assertTrue(self.checkFileExists(butler.datastore.root,
880 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
882 # Check the template based on dimensions
883 butler.datastore.templates.validateTemplates([ref])
885 # Now use a file template that will not result in unique filenames
886 ref = butler.put(metric, "metric3", dataId1)
888 # Check the template based on dimensions. This one is a bad template
889 with self.assertRaises(FileTemplateValidationError):
890 butler.datastore.templates.validateTemplates([ref])
892 with self.assertRaises(FileExistsError):
893 butler.put(metric, "metric3", dataId3)
895 def testImportExport(self):
896 # Run put/get tests just to create and populate a repo.
897 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
898 self.runImportExportTest(storageClass)
900 @unittest.expectedFailure
901 def testImportExportVirtualComposite(self):
902 # Run put/get tests just to create and populate a repo.
903 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
904 self.runImportExportTest(storageClass)
906 def runImportExportTest(self, storageClass):
907 exportButler = self.runPutGetTest(storageClass, "test_metric")
908 # Test that the repo actually has at least one dataset.
909 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
910 self.assertGreater(len(datasets), 0)
911 # Export those datasets. We used TemporaryDirectory because there
912 # doesn't seem to be a way to get the filename (as opposed to the file
913 # object) from any of tempfile's temporary-file context managers.
914 with tempfile.TemporaryDirectory() as exportDir:
915 # TODO: When PosixDatastore supports transfer-on-exist, add tests
916 # for that.
917 exportFile = os.path.join(exportDir, "exports.yaml")
918 with exportButler.export(filename=exportFile) as export:
919 export.saveDatasets(datasets)
920 self.assertTrue(os.path.exists(exportFile))
921 with tempfile.TemporaryDirectory() as importDir:
922 Butler.makeRepo(importDir, config=Config(self.configFile))
923 # Calling script.butlerImport tests the implementation of the
924 # butler command line interface "import" subcommand. Functions
925 # in the script folder are generally considered protected and
926 # should not be used as public api.
927 with open(exportFile, "r") as f:
928 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
929 directory=exportButler.datastore.root, transfer="symlink")
930 importButler = Butler(importDir, run="ingest/run")
931 for ref in datasets:
932 with self.subTest(ref=ref):
933 # Test for existence by passing in the DatasetType and
934 # data ID separately, to avoid lookup by dataset_id.
935 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
938class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
939 """PosixDatastore specialization of a butler"""
940 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
941 fullConfigKey = ".datastore.formatters"
942 validationCanFail = True
943 datastoreStr = ["/tmp"]
944 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
945 registryStr = "/gen3.sqlite3"
948class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
949 """InMemoryDatastore specialization of a butler"""
950 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
951 fullConfigKey = None
952 useTempRoot = False
953 validationCanFail = False
954 datastoreStr = ["datastore='InMemory"]
955 datastoreName = ["InMemoryDatastore@"]
956 registryStr = ":memory:"
958 def testIngest(self):
959 pass
962class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
963 """PosixDatastore specialization"""
964 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
965 fullConfigKey = ".datastore.datastores.1.formatters"
966 validationCanFail = True
967 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
968 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
969 "SecondDatastore"]
970 registryStr = "/gen3.sqlite3"
973class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
974 """Test that a yaml file in one location can refer to a root in another."""
976 datastoreStr = ["dir1"]
977 # Disable the makeRepo test since we are deliberately not using
978 # butler.yaml as the config name.
979 fullConfigKey = None
981 def setUp(self):
982 self.root = tempfile.mkdtemp(dir=TESTDIR)
984 # Make a new repository in one place
985 self.dir1 = os.path.join(self.root, "dir1")
986 Butler.makeRepo(self.dir1, config=Config(self.configFile))
988 # Move the yaml file to a different place and add a "root"
989 self.dir2 = os.path.join(self.root, "dir2")
990 safeMakeDir(self.dir2)
991 configFile1 = os.path.join(self.dir1, "butler.yaml")
992 config = Config(configFile1)
993 config["root"] = self.dir1
994 configFile2 = os.path.join(self.dir2, "butler2.yaml")
995 config.dumpToFile(configFile2)
996 os.remove(configFile1)
997 self.tmpConfigFile = configFile2
999 def testFileLocations(self):
1000 self.assertNotEqual(self.dir1, self.dir2)
1001 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1002 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1003 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1006class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1007 """Test that a config file created by makeRepo outside of repo works."""
1009 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1011 def setUp(self):
1012 self.root = tempfile.mkdtemp(dir=TESTDIR)
1013 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1015 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1016 Butler.makeRepo(self.root, config=Config(self.configFile),
1017 outfile=self.tmpConfigFile)
1019 def tearDown(self):
1020 if os.path.exists(self.root2):
1021 shutil.rmtree(self.root2, ignore_errors=True)
1022 super().tearDown()
1024 def testConfigExistence(self):
1025 c = Config(self.tmpConfigFile)
1026 uri_config = ButlerURI(c["root"])
1027 uri_expected = ButlerURI(self.root, forceDirectory=True)
1028 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1029 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1031 def testPutGet(self):
1032 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1033 self.runPutGetTest(storageClass, "test_metric")
1036class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1037 """Test that a config file created by makeRepo outside of repo works."""
1039 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1041 def setUp(self):
1042 self.root = tempfile.mkdtemp(dir=TESTDIR)
1043 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1045 self.tmpConfigFile = self.root2
1046 Butler.makeRepo(self.root, config=Config(self.configFile),
1047 outfile=self.tmpConfigFile)
1049 def testConfigExistence(self):
1050 # Append the yaml file else Config constructor does not know the file
1051 # type.
1052 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1053 super().testConfigExistence()
1056class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1057 """Test that a config file created by makeRepo outside of repo works."""
1059 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1061 def setUp(self):
1062 self.root = tempfile.mkdtemp(dir=TESTDIR)
1063 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1065 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1066 Butler.makeRepo(self.root, config=Config(self.configFile),
1067 outfile=self.tmpConfigFile)
1070@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1071@mock_s3
1072class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1073 """S3Datastore specialization of a butler; an S3 storage Datastore +
1074 a local in-memory SqlRegistry.
1075 """
1076 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1077 fullConfigKey = None
1078 validationCanFail = True
1080 bucketName = "anybucketname"
1081 """Name of the Bucket that will be used in the tests. The name is read from
1082 the config file used with the tests during set-up.
1083 """
1085 root = "butlerRoot/"
1086 """Root repository directory expected to be used in case useTempRoot=False.
1087 Otherwise the root is set to a 20 characters long randomly generated string
1088 during set-up.
1089 """
1091 datastoreStr = [f"datastore={root}"]
1092 """Contains all expected root locations in a format expected to be
1093 returned by Butler stringification.
1094 """
1096 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1097 """The expected format of the S3Datastore string."""
1099 registryStr = ":memory:"
1100 """Expected format of the Registry string."""
1102 def genRoot(self):
1103 """Returns a random string of len 20 to serve as a root
1104 name for the temporary bucket repo.
1106 This is equivalent to tempfile.mkdtemp as this is what self.root
1107 becomes when useTempRoot is True.
1108 """
1109 rndstr = "".join(
1110 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1111 )
1112 return rndstr + "/"
1114 def setUp(self):
1115 config = Config(self.configFile)
1116 uri = ButlerURI(config[".datastore.datastore.root"])
1117 self.bucketName = uri.netloc
1119 # set up some fake credentials if they do not exist
1120 self.usingDummyCredentials = setAwsEnvCredentials()
1122 if self.useTempRoot:
1123 self.root = self.genRoot()
1124 rooturi = f"s3://{self.bucketName}/{self.root}"
1125 config.update({"datastore": {"datastore": {"root": rooturi}}})
1127 # MOTO needs to know that we expect Bucket bucketname to exist
1128 # (this used to be the class attribute bucketName)
1129 s3 = boto3.resource("s3")
1130 s3.create_bucket(Bucket=self.bucketName)
1132 self.datastoreStr = f"datastore={self.root}"
1133 self.datastoreName = [f"S3Datastore@{rooturi}"]
1134 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1135 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1137 def tearDown(self):
1138 s3 = boto3.resource("s3")
1139 bucket = s3.Bucket(self.bucketName)
1140 try:
1141 bucket.objects.all().delete()
1142 except botocore.exceptions.ClientError as e:
1143 if e.response["Error"]["Code"] == "404":
1144 # the key was not reachable - pass
1145 pass
1146 else:
1147 raise
1149 bucket = s3.Bucket(self.bucketName)
1150 bucket.delete()
1152 # unset any potentially set dummy credentials
1153 if self.usingDummyCredentials:
1154 unsetAwsEnvCredentials()
1156 def checkFileExists(self, root, relpath):
1157 """Checks if file exists at a given path (relative to root).
1159 Test testPutTemplates verifies actual physical existance of the files
1160 in the requested location. For S3Datastore this test is equivalent to
1161 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1162 """
1163 uri = ButlerURI(root)
1164 uri.updateFile(relpath)
1165 return s3CheckFileExists(uri)[0]
1167 @unittest.expectedFailure
1168 def testImportExport(self):
1169 super().testImportExport()
1172if __name__ == "__main__": 1172 ↛ 1173line 1172 didn't jump to line 1173, because the condition on line 1172 was never true
1173 unittest.main()