Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core.webdavutils import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75TESTDIR = os.path.abspath(os.path.dirname(__file__))
78def makeExampleMetrics():
79 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
80 {"a": [1, 2, 3],
81 "b": {"blue": 5, "red": "green"}},
82 [563, 234, 456.7, 752, 8, 9, 27]
83 )
86class TransactionTestError(Exception):
87 """Specific error for testing transactions, to prevent misdiagnosing
88 that might otherwise occur when a standard exception is used.
89 """
90 pass
93class ButlerConfigTests(unittest.TestCase):
94 """Simple tests for ButlerConfig that are not tested in other test cases.
95 """
97 def testSearchPath(self):
98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
100 config1 = ButlerConfig(configFile)
101 self.assertNotIn("testConfigs", "\n".join(cm.output))
103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
106 self.assertIn("testConfigs", "\n".join(cm.output))
108 key = ("datastore", "records", "table")
109 self.assertNotEqual(config1[key], config2[key])
110 self.assertEqual(config2[key], "override_record")
113class ButlerPutGetTests:
114 """Helper method for running a suite of put/get tests from different
115 butler configurations."""
117 root = None
119 @staticmethod
120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
121 """Create a DatasetType and register it
122 """
123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(datasetType)
125 return datasetType
127 @classmethod
128 def setUpClass(cls):
129 cls.storageClassFactory = StorageClassFactory()
130 cls.storageClassFactory.addFromConfig(cls.configFile)
132 def assertGetComponents(self, butler, datasetRef, components, reference):
133 datasetType = datasetRef.datasetType
134 dataId = datasetRef.dataId
135 for component in components:
136 compTypeName = datasetType.componentTypeName(component)
137 result = butler.get(compTypeName, dataId)
138 self.assertEqual(result, getattr(reference, component))
140 def tearDown(self):
141 if self.root is not None and os.path.exists(self.root):
142 shutil.rmtree(self.root, ignore_errors=True)
144 def runPutGetTest(self, storageClass, datasetTypeName):
145 # New datasets will be added to run and tag, but we will only look in
146 # tag when looking up datasets.
147 run = "ingest/run"
148 tag = "ingest"
149 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
151 # There will not be a collection yet
152 collections = set(butler.registry.queryCollections())
153 self.assertEqual(collections, set([run, tag]))
155 # Create and register a DatasetType
156 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
158 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
160 # Try to create one that will have a name that is too long
161 with self.assertRaises(Exception) as cm:
162 self.addDatasetType("DatasetTypeNameTooLong" * 50, dimensions, storageClass, butler.registry)
163 self.assertIn("check constraint", str(cm.exception).lower())
165 # Add needed Dimensions
166 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
167 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
168 "name": "d-r",
169 "band": "R"})
170 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
171 "id": 1,
172 "name": "default"})
173 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
174 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
175 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
176 "name": "fourtwentythree", "physical_filter": "d-r",
177 "visit_system": 1, "datetime_begin": visit_start,
178 "datetime_end": visit_end})
180 # Add a second visit for some later tests
181 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
182 "name": "fourtwentyfour", "physical_filter": "d-r",
183 "visit_system": 1})
185 # Create and store a dataset
186 metric = makeExampleMetrics()
187 dataId = {"instrument": "DummyCamComp", "visit": 423}
189 # Create a DatasetRef for put
190 refIn = DatasetRef(datasetType, dataId, id=None)
192 # Put with a preexisting id should fail
193 with self.assertRaises(ValueError):
194 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
196 # Put and remove the dataset once as a DatasetRef, once as a dataId,
197 # and once with a DatasetType
198 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
199 with self.subTest(args=args):
200 ref = butler.put(metric, *args)
201 self.assertIsInstance(ref, DatasetRef)
203 # Test getDirect
204 metricOut = butler.getDirect(ref)
205 self.assertEqual(metric, metricOut)
206 # Test get
207 metricOut = butler.get(ref.datasetType.name, dataId)
208 self.assertEqual(metric, metricOut)
209 # Test get with a datasetRef
210 metricOut = butler.get(ref)
211 self.assertEqual(metric, metricOut)
212 # Test getDeferred with dataId
213 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
214 self.assertEqual(metric, metricOut)
215 # Test getDeferred with a datasetRef
216 metricOut = butler.getDeferred(ref).get()
217 self.assertEqual(metric, metricOut)
219 # Check we can get components
220 if storageClass.isComposite():
221 self.assertGetComponents(butler, ref,
222 ("summary", "data", "output"), metric)
224 # Remove from the tagged collection only; after that we
225 # shouldn't be able to find it unless we use the dataset_id.
226 butler.pruneDatasets([ref])
227 with self.assertRaises(LookupError):
228 butler.datasetExists(*args)
229 # Registry still knows about it, if we use the dataset_id.
230 self.assertEqual(butler.registry.getDataset(ref.id), ref)
231 # If we use the output ref with the dataset_id, we should
232 # still be able to load it with getDirect().
233 self.assertEqual(metric, butler.getDirect(ref))
235 # Reinsert into collection, then delete from Datastore *and*
236 # remove from collection.
237 butler.registry.associate(tag, [ref])
238 butler.pruneDatasets([ref], unstore=True)
239 # Lookup with original args should still fail.
240 with self.assertRaises(LookupError):
241 butler.datasetExists(*args)
242 # Now getDirect() should fail, too.
243 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
244 butler.getDirect(ref)
245 # Registry still knows about it, if we use the dataset_id.
246 self.assertEqual(butler.registry.getDataset(ref.id), ref)
248 # Now remove the dataset completely.
249 butler.pruneDatasets([ref], purge=True, unstore=True)
250 # Lookup with original args should still fail.
251 with self.assertRaises(LookupError):
252 butler.datasetExists(*args)
253 # getDirect() should still fail.
254 with self.assertRaises(FileNotFoundError):
255 butler.getDirect(ref)
256 # Registry shouldn't be able to find it by dataset_id anymore.
257 self.assertIsNone(butler.registry.getDataset(ref.id))
259 # Put the dataset again, since the last thing we did was remove it.
260 ref = butler.put(metric, refIn)
262 # Get with parameters
263 stop = 4
264 sliced = butler.get(ref, parameters={"slice": slice(stop)})
265 self.assertNotEqual(metric, sliced)
266 self.assertEqual(metric.summary, sliced.summary)
267 self.assertEqual(metric.output, sliced.output)
268 self.assertEqual(metric.data[:stop], sliced.data)
269 # getDeferred with parameters
270 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
271 self.assertNotEqual(metric, sliced)
272 self.assertEqual(metric.summary, sliced.summary)
273 self.assertEqual(metric.output, sliced.output)
274 self.assertEqual(metric.data[:stop], sliced.data)
275 # getDeferred with deferred parameters
276 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
277 self.assertNotEqual(metric, sliced)
278 self.assertEqual(metric.summary, sliced.summary)
279 self.assertEqual(metric.output, sliced.output)
280 self.assertEqual(metric.data[:stop], sliced.data)
282 if storageClass.isComposite():
283 # Check that components can be retrieved
284 metricOut = butler.get(ref.datasetType.name, dataId)
285 compNameS = ref.datasetType.componentTypeName("summary")
286 compNameD = ref.datasetType.componentTypeName("data")
287 summary = butler.get(compNameS, dataId)
288 self.assertEqual(summary, metric.summary)
289 data = butler.get(compNameD, dataId)
290 self.assertEqual(data, metric.data)
292 if "counter" in storageClass.derivedComponents:
293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
294 self.assertEqual(count, len(data))
296 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
297 parameters={"slice": slice(stop)})
298 self.assertEqual(count, stop)
300 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
301 summary = butler.getDirect(compRef)
302 self.assertEqual(summary, metric.summary)
304 # Create a Dataset type that has the same name but is inconsistent.
305 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
306 self.storageClassFactory.getStorageClass("Config"))
308 # Getting with a dataset type that does not match registry fails
309 with self.assertRaises(ValueError):
310 butler.get(inconsistentDatasetType, dataId)
312 # Combining a DatasetRef with a dataId should fail
313 with self.assertRaises(ValueError):
314 butler.get(ref, dataId)
315 # Getting with an explicit ref should fail if the id doesn't match
316 with self.assertRaises(ValueError):
317 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
319 # Getting a dataset with unknown parameters should fail
320 with self.assertRaises(KeyError):
321 butler.get(ref, parameters={"unsupported": True})
323 # Check we have a collection
324 collections = set(butler.registry.queryCollections())
325 self.assertEqual(collections, {run, tag})
327 # Clean up to check that we can remove something that may have
328 # already had a component removed
329 butler.pruneDatasets([ref], unstore=True, purge=True)
331 # Add a dataset back in since some downstream tests require
332 # something to be present
333 ref = butler.put(metric, refIn)
335 return butler
337 def testDeferredCollectionPassing(self):
338 # Construct a butler with no run or collection, but make it writeable.
339 butler = Butler(self.tmpConfigFile, writeable=True)
340 # Create and register a DatasetType
341 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
342 datasetType = self.addDatasetType("example", dimensions,
343 self.storageClassFactory.getStorageClass("StructuredData"),
344 butler.registry)
345 # Add needed Dimensions
346 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
347 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
348 "name": "d-r",
349 "band": "R"})
350 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
351 "name": "fourtwentythree", "physical_filter": "d-r"})
352 dataId = {"instrument": "DummyCamComp", "visit": 423}
353 # Create dataset.
354 metric = makeExampleMetrics()
355 # Register a new run and put dataset.
356 run = "deferred"
357 butler.registry.registerRun(run)
358 ref = butler.put(metric, datasetType, dataId, run=run)
359 # Putting with no run should fail with TypeError.
360 with self.assertRaises(TypeError):
361 butler.put(metric, datasetType, dataId)
362 # Dataset should exist.
363 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
364 # We should be able to get the dataset back, but with and without
365 # a deferred dataset handle.
366 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
367 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
368 # Trying to find the dataset without any collection is a TypeError.
369 with self.assertRaises(TypeError):
370 butler.datasetExists(datasetType, dataId)
371 with self.assertRaises(TypeError):
372 butler.get(datasetType, dataId)
373 # Associate the dataset with a different collection.
374 butler.registry.registerCollection("tagged")
375 butler.registry.associate("tagged", [ref])
376 # Deleting the dataset from the new collection should make it findable
377 # in the original collection.
378 butler.pruneDatasets([ref], tags=["tagged"])
379 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
382class ButlerTests(ButlerPutGetTests):
383 """Tests for Butler.
384 """
385 useTempRoot = True
387 def setUp(self):
388 """Create a new butler root for each test."""
389 if self.useTempRoot:
390 self.root = tempfile.mkdtemp(dir=TESTDIR)
391 Butler.makeRepo(self.root, config=Config(self.configFile))
392 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
393 else:
394 self.root = None
395 self.tmpConfigFile = self.configFile
397 def testConstructor(self):
398 """Independent test of constructor.
399 """
400 butler = Butler(self.tmpConfigFile, run="ingest")
401 self.assertIsInstance(butler, Butler)
403 collections = set(butler.registry.queryCollections())
404 self.assertEqual(collections, {"ingest"})
406 butler2 = Butler(butler=butler, collections=["other"])
407 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
408 self.assertIsNone(butler2.run)
409 self.assertIs(butler.registry, butler2.registry)
410 self.assertIs(butler.datastore, butler2.datastore)
412 def testBasicPutGet(self):
413 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
414 self.runPutGetTest(storageClass, "test_metric")
416 def testCompositePutGetConcrete(self):
418 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
419 butler = self.runPutGetTest(storageClass, "test_metric")
421 # Should *not* be disassembled
422 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
423 self.assertEqual(len(datasets), 1)
424 uri, components = butler.getURIs(datasets[0])
425 self.assertIsInstance(uri, ButlerURI)
426 self.assertFalse(components)
427 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
428 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
430 # Predicted dataset
431 dataId = {"instrument": "DummyCamComp", "visit": 424}
432 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
433 self.assertFalse(components)
434 self.assertIsInstance(uri, ButlerURI)
435 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
436 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
438 def testCompositePutGetVirtual(self):
439 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
440 butler = self.runPutGetTest(storageClass, "test_metric_comp")
442 # Should be disassembled
443 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
444 self.assertEqual(len(datasets), 1)
445 uri, components = butler.getURIs(datasets[0])
447 if butler.datastore.isEphemeral:
448 # Never disassemble in-memory datastore
449 self.assertIsInstance(uri, ButlerURI)
450 self.assertFalse(components)
451 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
452 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
453 else:
454 self.assertIsNone(uri)
455 self.assertEqual(set(components), set(storageClass.components))
456 for compuri in components.values():
457 self.assertIsInstance(compuri, ButlerURI)
458 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
459 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
461 # Predicted dataset
462 dataId = {"instrument": "DummyCamComp", "visit": 424}
463 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
465 if butler.datastore.isEphemeral:
466 # Never disassembled
467 self.assertIsInstance(uri, ButlerURI)
468 self.assertFalse(components)
469 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
470 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
471 else:
472 self.assertIsNone(uri)
473 self.assertEqual(set(components), set(storageClass.components))
474 for compuri in components.values():
475 self.assertIsInstance(compuri, ButlerURI)
476 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
477 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
479 def testIngest(self):
480 butler = Butler(self.tmpConfigFile, run="ingest")
482 # Create and register a DatasetType
483 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
485 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
486 datasetTypeName = "metric"
488 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
490 # Add needed Dimensions
491 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
492 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
493 "name": "d-r",
494 "band": "R"})
495 for detector in (1, 2):
496 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
497 "full_name": f"detector{detector}"})
499 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
500 "name": "fourtwentythree", "physical_filter": "d-r"},
501 {"instrument": "DummyCamComp", "id": 424,
502 "name": "fourtwentyfour", "physical_filter": "d-r"})
504 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
505 dataRoot = os.path.join(TESTDIR, "data", "basic")
506 datasets = []
507 for detector in (1, 2):
508 detector_name = f"detector_{detector}"
509 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
510 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
511 # Create a DatasetRef for ingest
512 refIn = DatasetRef(datasetType, dataId, id=None)
514 datasets.append(FileDataset(path=metricFile,
515 refs=[refIn],
516 formatter=formatter))
518 butler.ingest(*datasets, transfer="copy")
520 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
521 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
523 metrics1 = butler.get(datasetTypeName, dataId1)
524 metrics2 = butler.get(datasetTypeName, dataId2)
525 self.assertNotEqual(metrics1, metrics2)
527 # Compare URIs
528 uri1 = butler.getURI(datasetTypeName, dataId1)
529 uri2 = butler.getURI(datasetTypeName, dataId2)
530 self.assertNotEqual(uri1, uri2)
532 # Now do a multi-dataset but single file ingest
533 metricFile = os.path.join(dataRoot, "detectors.yaml")
534 refs = []
535 for detector in (1, 2):
536 detector_name = f"detector_{detector}"
537 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
538 # Create a DatasetRef for ingest
539 refs.append(DatasetRef(datasetType, dataId, id=None))
541 datasets = []
542 datasets.append(FileDataset(path=metricFile,
543 refs=refs,
544 formatter=MultiDetectorFormatter))
546 butler.ingest(*datasets, transfer="copy")
548 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
549 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
551 multi1 = butler.get(datasetTypeName, dataId1)
552 multi2 = butler.get(datasetTypeName, dataId2)
554 self.assertEqual(multi1, metrics1)
555 self.assertEqual(multi2, metrics2)
557 # Compare URIs
558 uri1 = butler.getURI(datasetTypeName, dataId1)
559 uri2 = butler.getURI(datasetTypeName, dataId2)
560 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
562 # Test that removing one does not break the second
563 # This line will issue a warning log message for a ChainedDatastore
564 # that uses an InMemoryDatastore since in-memory can not ingest
565 # files.
566 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
567 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
568 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
569 multi2b = butler.get(datasetTypeName, dataId2)
570 self.assertEqual(multi2, multi2b)
572 def testPruneCollections(self):
573 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
574 butler = Butler(self.tmpConfigFile, writeable=True)
575 # Load registry data with dimensions to hang datasets off of.
576 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
577 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
578 # Add some RUN-type collections.
579 run1 = "run1"
580 butler.registry.registerRun(run1)
581 run2 = "run2"
582 butler.registry.registerRun(run2)
583 # put some datasets. ref1 and ref2 have the same data ID, and are in
584 # different runs. ref3 has a different data ID.
585 metric = makeExampleMetrics()
586 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
587 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
588 butler.registry)
589 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
590 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
591 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
592 # Try to delete a RUN collection without purge, or with purge and not
593 # unstore.
594 with self.assertRaises(TypeError):
595 butler.pruneCollection(run1)
596 with self.assertRaises(TypeError):
597 butler.pruneCollection(run2, purge=True)
598 # Add a TAGGED collection and associate ref3 only into it.
599 tag1 = "tag1"
600 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
601 butler.registry.associate(tag1, [ref3])
602 # Add a CHAINED collection that searches run1 and then run2. It
603 # logically contains only ref1, because ref2 is shadowed due to them
604 # having the same data ID and dataset type.
605 chain1 = "chain1"
606 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
607 butler.registry.setCollectionChain(chain1, [run1, run2])
608 # Try to delete RUN collections, which should fail with complete
609 # rollback because they're still referenced by the CHAINED
610 # collection.
611 with self.assertRaises(Exception):
612 butler.pruneCollection(run1, pruge=True, unstore=True)
613 with self.assertRaises(Exception):
614 butler.pruneCollection(run2, pruge=True, unstore=True)
615 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
616 [ref1, ref2, ref3])
617 self.assertTrue(butler.datastore.exists(ref1))
618 self.assertTrue(butler.datastore.exists(ref2))
619 self.assertTrue(butler.datastore.exists(ref3))
620 # Try to delete CHAINED and TAGGED collections with purge; should not
621 # work.
622 with self.assertRaises(TypeError):
623 butler.pruneCollection(tag1, purge=True, unstore=True)
624 with self.assertRaises(TypeError):
625 butler.pruneCollection(chain1, purge=True, unstore=True)
626 # Remove the tagged collection with unstore=False. This should not
627 # affect the datasets.
628 butler.pruneCollection(tag1)
629 with self.assertRaises(MissingCollectionError):
630 butler.registry.getCollectionType(tag1)
631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
632 [ref1, ref2, ref3])
633 self.assertTrue(butler.datastore.exists(ref1))
634 self.assertTrue(butler.datastore.exists(ref2))
635 self.assertTrue(butler.datastore.exists(ref3))
636 # Add the tagged collection back in, and remove it with unstore=True.
637 # This should remove ref3 only from the datastore.
638 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
639 butler.registry.associate(tag1, [ref3])
640 butler.pruneCollection(tag1, unstore=True)
641 with self.assertRaises(MissingCollectionError):
642 butler.registry.getCollectionType(tag1)
643 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
644 [ref1, ref2, ref3])
645 self.assertTrue(butler.datastore.exists(ref1))
646 self.assertTrue(butler.datastore.exists(ref2))
647 self.assertFalse(butler.datastore.exists(ref3))
648 # Delete the chain with unstore=False. The datasets should not be
649 # affected at all.
650 butler.pruneCollection(chain1)
651 with self.assertRaises(MissingCollectionError):
652 butler.registry.getCollectionType(chain1)
653 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
654 [ref1, ref2, ref3])
655 self.assertTrue(butler.datastore.exists(ref1))
656 self.assertTrue(butler.datastore.exists(ref2))
657 self.assertFalse(butler.datastore.exists(ref3))
658 # Redefine and then delete the chain with unstore=True. Only ref1
659 # should be unstored (ref3 has already been unstored, but otherwise
660 # would be now).
661 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
662 butler.registry.setCollectionChain(chain1, [run1, run2])
663 butler.pruneCollection(chain1, unstore=True)
664 with self.assertRaises(MissingCollectionError):
665 butler.registry.getCollectionType(chain1)
666 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
667 [ref1, ref2, ref3])
668 self.assertFalse(butler.datastore.exists(ref1))
669 self.assertTrue(butler.datastore.exists(ref2))
670 self.assertFalse(butler.datastore.exists(ref3))
671 # Remove run1. This removes ref1 and ref3 from the registry (they're
672 # already gone from the datastore, which is fine).
673 butler.pruneCollection(run1, purge=True, unstore=True)
674 with self.assertRaises(MissingCollectionError):
675 butler.registry.getCollectionType(run1)
676 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
677 [ref2])
678 self.assertTrue(butler.datastore.exists(ref2))
679 # Remove run2. This removes ref2 from the registry and the datastore.
680 butler.pruneCollection(run2, purge=True, unstore=True)
681 with self.assertRaises(MissingCollectionError):
682 butler.registry.getCollectionType(run2)
683 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
684 [])
686 def testPickle(self):
687 """Test pickle support.
688 """
689 butler = Butler(self.tmpConfigFile, run="ingest")
690 butlerOut = pickle.loads(pickle.dumps(butler))
691 self.assertIsInstance(butlerOut, Butler)
692 self.assertEqual(butlerOut._config, butler._config)
693 self.assertEqual(butlerOut.collections, butler.collections)
694 self.assertEqual(butlerOut.run, butler.run)
696 def testGetDatasetTypes(self):
697 butler = Butler(self.tmpConfigFile, run="ingest")
698 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
699 dimensionEntries = [
700 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
701 {"instrument": "DummyCamComp"}),
702 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
703 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
704 ]
705 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
706 # Add needed Dimensions
707 for args in dimensionEntries:
708 butler.registry.insertDimensionData(*args)
710 # When a DatasetType is added to the registry entries are not created
711 # for components but querying them can return the components.
712 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
713 components = set()
714 for datasetTypeName in datasetTypeNames:
715 # Create and register a DatasetType
716 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
718 for componentName in storageClass.components:
719 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
721 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
722 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
724 # Now that we have some dataset types registered, validate them
725 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
726 "datasetType.component"])
728 # Add a new datasetType that will fail template validation
729 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
730 if self.validationCanFail:
731 with self.assertRaises(ValidationError):
732 butler.validateConfiguration()
734 # Rerun validation but with a subset of dataset type names
735 butler.validateConfiguration(datasetTypeNames=["metric4"])
737 # Rerun validation but ignore the bad datasetType
738 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
739 "datasetType.component"])
741 def testTransaction(self):
742 butler = Butler(self.tmpConfigFile, run="ingest")
743 datasetTypeName = "test_metric"
744 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
745 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
747 "band": "R"}),
748 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
749 "physical_filter": "d-r"}))
750 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
751 metric = makeExampleMetrics()
752 dataId = {"instrument": "DummyCam", "visit": 42}
753 # Create and register a DatasetType
754 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
755 with self.assertRaises(TransactionTestError):
756 with butler.transaction():
757 # Add needed Dimensions
758 for args in dimensionEntries:
759 butler.registry.insertDimensionData(*args)
760 # Store a dataset
761 ref = butler.put(metric, datasetTypeName, dataId)
762 self.assertIsInstance(ref, DatasetRef)
763 # Test getDirect
764 metricOut = butler.getDirect(ref)
765 self.assertEqual(metric, metricOut)
766 # Test get
767 metricOut = butler.get(datasetTypeName, dataId)
768 self.assertEqual(metric, metricOut)
769 # Check we can get components
770 self.assertGetComponents(butler, ref,
771 ("summary", "data", "output"), metric)
772 raise TransactionTestError("This should roll back the entire transaction")
773 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
774 butler.registry.expandDataId(dataId)
775 # Should raise LookupError for missing data ID value
776 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
777 butler.get(datasetTypeName, dataId)
778 # Also check explicitly if Dataset entry is missing
779 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
780 # Direct retrieval should not find the file in the Datastore
781 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
782 butler.getDirect(ref)
784 def testMakeRepo(self):
785 """Test that we can write butler configuration to a new repository via
786 the Butler.makeRepo interface and then instantiate a butler from the
787 repo root.
788 """
789 # Do not run the test if we know this datastore configuration does
790 # not support a file system root
791 if self.fullConfigKey is None:
792 return
794 # Remove the file created in setUp
795 os.unlink(self.tmpConfigFile)
797 createRegistry = not self.useTempRoot
798 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile),
799 createRegistry=createRegistry)
800 limited = Config(self.configFile)
801 butler1 = Butler(butlerConfig)
802 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
803 config=Config(self.configFile), overwrite=True)
804 full = Config(self.tmpConfigFile)
805 butler2 = Butler(butlerConfig)
806 # Butlers should have the same configuration regardless of whether
807 # defaults were expanded.
808 self.assertEqual(butler1._config, butler2._config)
809 # Config files loaded directly should not be the same.
810 self.assertNotEqual(limited, full)
811 # Make sure "limited" doesn't have a few keys we know it should be
812 # inheriting from defaults.
813 self.assertIn(self.fullConfigKey, full)
814 self.assertNotIn(self.fullConfigKey, limited)
816 # Collections don't appear until something is put in them
817 collections1 = set(butler1.registry.queryCollections())
818 self.assertEqual(collections1, set())
819 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
821 # Check that a config with no associated file name will not
822 # work properly with relocatable Butler repo
823 butlerConfig.configFile = None
824 with self.assertRaises(ValueError):
825 Butler(butlerConfig)
827 with self.assertRaises(FileExistsError):
828 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
829 config=Config(self.configFile), overwrite=False)
831 def testStringification(self):
832 butler = Butler(self.tmpConfigFile, run="ingest")
833 butlerStr = str(butler)
835 if self.datastoreStr is not None:
836 for testStr in self.datastoreStr:
837 self.assertIn(testStr, butlerStr)
838 if self.registryStr is not None:
839 self.assertIn(self.registryStr, butlerStr)
841 datastoreName = butler.datastore.name
842 if self.datastoreName is not None:
843 for testStr in self.datastoreName:
844 self.assertIn(testStr, datastoreName)
847class FileLikeDatastoreButlerTests(ButlerTests):
848 """Common tests and specialization of ButlerTests for butlers backed
849 by datastores that inherit from FileLikeDatastore.
850 """
852 def checkFileExists(self, root, relpath):
853 """Checks if file exists at a given path (relative to root).
855 Test testPutTemplates verifies actual physical existance of the files
856 in the requested location.
857 """
858 uri = ButlerURI(root, forceDirectory=True)
859 return uri.join(relpath).exists()
861 def testPutTemplates(self):
862 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
863 butler = Butler(self.tmpConfigFile, run="ingest")
865 # Add needed Dimensions
866 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
867 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
868 "name": "d-r",
869 "band": "R"})
870 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
871 "physical_filter": "d-r"})
872 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
873 "physical_filter": "d-r"})
875 # Create and store a dataset
876 metric = makeExampleMetrics()
878 # Create two almost-identical DatasetTypes (both will use default
879 # template)
880 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
881 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
882 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
883 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
885 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
886 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
888 # Put with exactly the data ID keys needed
889 ref = butler.put(metric, "metric1", dataId1)
890 uri = butler.getURI(ref)
891 self.assertTrue(self.checkFileExists(butler.datastore.root,
892 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
893 f"Checking existence of {uri}")
895 # Check the template based on dimensions
896 butler.datastore.templates.validateTemplates([ref])
898 # Put with extra data ID keys (physical_filter is an optional
899 # dependency); should not change template (at least the way we're
900 # defining them to behave now; the important thing is that they
901 # must be consistent).
902 ref = butler.put(metric, "metric2", dataId2)
903 uri = butler.getURI(ref)
904 self.assertTrue(self.checkFileExists(butler.datastore.root,
905 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
906 f"Checking existence of {uri}")
908 # Check the template based on dimensions
909 butler.datastore.templates.validateTemplates([ref])
911 # Now use a file template that will not result in unique filenames
912 with self.assertRaises(FileTemplateValidationError):
913 butler.put(metric, "metric3", dataId1)
915 def testImportExport(self):
916 # Run put/get tests just to create and populate a repo.
917 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
918 self.runImportExportTest(storageClass)
920 @unittest.expectedFailure
921 def testImportExportVirtualComposite(self):
922 # Run put/get tests just to create and populate a repo.
923 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
924 self.runImportExportTest(storageClass)
926 def runImportExportTest(self, storageClass):
927 """This test does an export to a temp directory and an import back
928 into a new temp directory repo. It does not assume a posix datastore"""
929 exportButler = self.runPutGetTest(storageClass, "test_metric")
930 print("Root:", exportButler.datastore.root)
931 # Test that the repo actually has at least one dataset.
932 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
933 self.assertGreater(len(datasets), 0)
934 # Add a DimensionRecord that's unused by those datasets.
935 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
936 exportButler.registry.insertDimensionData("skymap", skymapRecord)
937 # Export and then import datasets.
938 with tempfile.TemporaryDirectory() as exportDir:
939 exportFile = os.path.join(exportDir, "exports.yaml")
940 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
941 export.saveDatasets(datasets)
942 # Export the same datasets again. This should quietly do
943 # nothing because of internal deduplication, and it shouldn't
944 # complain about being asked to export the "htm7" elements even
945 # though there aren't any in these datasets or in the database.
946 export.saveDatasets(datasets, elements=["htm7"])
947 # Save one of the data IDs again; this should be harmless
948 # because of internal deduplication.
949 export.saveDataIds([datasets[0].dataId])
950 # Save some dimension records directly.
951 export.saveDimensionData("skymap", [skymapRecord])
952 self.assertTrue(os.path.exists(exportFile))
953 with tempfile.TemporaryDirectory() as importDir:
954 # We always want this to be a local posix butler
955 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
956 # Calling script.butlerImport tests the implementation of the
957 # butler command line interface "import" subcommand. Functions
958 # in the script folder are generally considered protected and
959 # should not be used as public api.
960 with open(exportFile, "r") as f:
961 script.butlerImport(importDir, output_run="ingest/run", export_file=f,
962 directory=exportDir, transfer="auto", skip_dimensions=None)
963 importButler = Butler(importDir, run="ingest/run")
964 for ref in datasets:
965 with self.subTest(ref=ref):
966 # Test for existence by passing in the DatasetType and
967 # data ID separately, to avoid lookup by dataset_id.
968 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
969 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
970 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
973class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
974 """PosixDatastore specialization of a butler"""
975 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
976 fullConfigKey = ".datastore.formatters"
977 validationCanFail = True
978 datastoreStr = ["/tmp"]
979 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
980 registryStr = "/gen3.sqlite3"
982 def testExportTransferCopy(self):
983 """Test local export using all transfer modes"""
984 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
985 exportButler = self.runPutGetTest(storageClass, "test_metric")
986 # Test that the repo actually has at least one dataset.
987 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
988 self.assertGreater(len(datasets), 0)
989 uris = [exportButler.getURI(d) for d in datasets]
990 datastoreRoot = exportButler.datastore.root
992 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
994 for path in pathsInStore:
995 # Assume local file system
996 self.assertTrue(self.checkFileExists(datastoreRoot, path),
997 f"Checking path {path}")
999 for transfer in ("copy", "link", "symlink", "relsymlink"):
1000 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
1001 with exportButler.export(directory=exportDir, format="yaml",
1002 transfer=transfer) as export:
1003 export.saveDatasets(datasets)
1004 for path in pathsInStore:
1005 self.assertTrue(self.checkFileExists(exportDir, path),
1006 f"Check that mode {transfer} exported files")
1009class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1010 """InMemoryDatastore specialization of a butler"""
1011 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1012 fullConfigKey = None
1013 useTempRoot = False
1014 validationCanFail = False
1015 datastoreStr = ["datastore='InMemory"]
1016 datastoreName = ["InMemoryDatastore@"]
1017 registryStr = ":memory:"
1019 def testIngest(self):
1020 pass
1023class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1024 """PosixDatastore specialization"""
1025 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1026 fullConfigKey = ".datastore.datastores.1.formatters"
1027 validationCanFail = True
1028 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1029 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1030 "SecondDatastore"]
1031 registryStr = "/gen3.sqlite3"
1034class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1035 """Test that a yaml file in one location can refer to a root in another."""
1037 datastoreStr = ["dir1"]
1038 # Disable the makeRepo test since we are deliberately not using
1039 # butler.yaml as the config name.
1040 fullConfigKey = None
1042 def setUp(self):
1043 self.root = tempfile.mkdtemp(dir=TESTDIR)
1045 # Make a new repository in one place
1046 self.dir1 = os.path.join(self.root, "dir1")
1047 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1049 # Move the yaml file to a different place and add a "root"
1050 self.dir2 = os.path.join(self.root, "dir2")
1051 safeMakeDir(self.dir2)
1052 configFile1 = os.path.join(self.dir1, "butler.yaml")
1053 config = Config(configFile1)
1054 config["root"] = self.dir1
1055 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1056 config.dumpToUri(configFile2)
1057 os.remove(configFile1)
1058 self.tmpConfigFile = configFile2
1060 def testFileLocations(self):
1061 self.assertNotEqual(self.dir1, self.dir2)
1062 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1063 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1064 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1067class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1068 """Test that a config file created by makeRepo outside of repo works."""
1070 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1072 def setUp(self):
1073 self.root = tempfile.mkdtemp(dir=TESTDIR)
1074 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1076 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1077 Butler.makeRepo(self.root, config=Config(self.configFile),
1078 outfile=self.tmpConfigFile)
1080 def tearDown(self):
1081 if os.path.exists(self.root2):
1082 shutil.rmtree(self.root2, ignore_errors=True)
1083 super().tearDown()
1085 def testConfigExistence(self):
1086 c = Config(self.tmpConfigFile)
1087 uri_config = ButlerURI(c["root"])
1088 uri_expected = ButlerURI(self.root, forceDirectory=True)
1089 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1090 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1092 def testPutGet(self):
1093 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1094 self.runPutGetTest(storageClass, "test_metric")
1097class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1098 """Test that a config file created by makeRepo outside of repo works."""
1100 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1102 def setUp(self):
1103 self.root = tempfile.mkdtemp(dir=TESTDIR)
1104 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1106 self.tmpConfigFile = self.root2
1107 Butler.makeRepo(self.root, config=Config(self.configFile),
1108 outfile=self.tmpConfigFile)
1110 def testConfigExistence(self):
1111 # Append the yaml file else Config constructor does not know the file
1112 # type.
1113 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1114 super().testConfigExistence()
1117class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1118 """Test that a config file created by makeRepo outside of repo works."""
1120 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1122 def setUp(self):
1123 self.root = tempfile.mkdtemp(dir=TESTDIR)
1124 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1126 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1127 Butler.makeRepo(self.root, config=Config(self.configFile),
1128 outfile=self.tmpConfigFile)
1131@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1132@mock_s3
1133class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1134 """S3Datastore specialization of a butler; an S3 storage Datastore +
1135 a local in-memory SqlRegistry.
1136 """
1137 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1138 fullConfigKey = None
1139 validationCanFail = True
1141 bucketName = "anybucketname"
1142 """Name of the Bucket that will be used in the tests. The name is read from
1143 the config file used with the tests during set-up.
1144 """
1146 root = "butlerRoot/"
1147 """Root repository directory expected to be used in case useTempRoot=False.
1148 Otherwise the root is set to a 20 characters long randomly generated string
1149 during set-up.
1150 """
1152 datastoreStr = [f"datastore={root}"]
1153 """Contains all expected root locations in a format expected to be
1154 returned by Butler stringification.
1155 """
1157 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1158 """The expected format of the S3Datastore string."""
1160 registryStr = ":memory:"
1161 """Expected format of the Registry string."""
1163 def genRoot(self):
1164 """Returns a random string of len 20 to serve as a root
1165 name for the temporary bucket repo.
1167 This is equivalent to tempfile.mkdtemp as this is what self.root
1168 becomes when useTempRoot is True.
1169 """
1170 rndstr = "".join(
1171 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1172 )
1173 return rndstr + "/"
1175 def setUp(self):
1176 config = Config(self.configFile)
1177 uri = ButlerURI(config[".datastore.datastore.root"])
1178 self.bucketName = uri.netloc
1180 # set up some fake credentials if they do not exist
1181 self.usingDummyCredentials = setAwsEnvCredentials()
1183 if self.useTempRoot:
1184 self.root = self.genRoot()
1185 rooturi = f"s3://{self.bucketName}/{self.root}"
1186 config.update({"datastore": {"datastore": {"root": rooturi}}})
1188 # MOTO needs to know that we expect Bucket bucketname to exist
1189 # (this used to be the class attribute bucketName)
1190 s3 = boto3.resource("s3")
1191 s3.create_bucket(Bucket=self.bucketName)
1193 self.datastoreStr = f"datastore={self.root}"
1194 self.datastoreName = [f"S3Datastore@{rooturi}"]
1195 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1196 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1198 def tearDown(self):
1199 s3 = boto3.resource("s3")
1200 bucket = s3.Bucket(self.bucketName)
1201 try:
1202 bucket.objects.all().delete()
1203 except botocore.exceptions.ClientError as e:
1204 if e.response["Error"]["Code"] == "404":
1205 # the key was not reachable - pass
1206 pass
1207 else:
1208 raise
1210 bucket = s3.Bucket(self.bucketName)
1211 bucket.delete()
1213 # unset any potentially set dummy credentials
1214 if self.usingDummyCredentials:
1215 unsetAwsEnvCredentials()
1218@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1219# Mock required environment variables during tests
1220@unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1221 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1222class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1223 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1224 a local in-memory SqlRegistry.
1225 """
1226 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1227 fullConfigKey = None
1228 validationCanFail = True
1230 serverName = "localhost"
1231 """Name of the server that will be used in the tests.
1232 """
1234 portNumber = 8080
1235 """Port on which the webdav server listens. Automatically chosen
1236 at setUpClass via the _getfreeport() method
1237 """
1239 root = "butlerRoot/"
1240 """Root repository directory expected to be used in case useTempRoot=False.
1241 Otherwise the root is set to a 20 characters long randomly generated string
1242 during set-up.
1243 """
1245 datastoreStr = [f"datastore={root}"]
1246 """Contains all expected root locations in a format expected to be
1247 returned by Butler stringification.
1248 """
1250 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"]
1251 """The expected format of the WebdavDatastore string."""
1253 registryStr = ":memory:"
1254 """Expected format of the Registry string."""
1256 serverThread = None
1257 """Thread in which the local webdav server will run"""
1259 stopWebdavServer = False
1260 """This flag will cause the webdav server to
1261 gracefully shut down when True
1262 """
1264 def genRoot(self):
1265 """Returns a random string of len 20 to serve as a root
1266 name for the temporary bucket repo.
1268 This is equivalent to tempfile.mkdtemp as this is what self.root
1269 becomes when useTempRoot is True.
1270 """
1271 rndstr = "".join(
1272 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1273 )
1274 return rndstr + "/"
1276 @classmethod
1277 def setUpClass(cls):
1278 # Do the same as inherited class
1279 cls.storageClassFactory = StorageClassFactory()
1280 cls.storageClassFactory.addFromConfig(cls.configFile)
1282 cls.portNumber = cls._getfreeport()
1283 # Run a local webdav server on which tests will be run
1284 cls.serverThread = Thread(target=cls._serveWebdav,
1285 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1286 daemon=True)
1287 cls.serverThread.start()
1288 # Wait for it to start
1289 time.sleep(3)
1291 @classmethod
1292 def tearDownClass(cls):
1293 # Ask for graceful shut down of the webdav server
1294 cls.stopWebdavServer = True
1295 # Wait for the thread to exit
1296 cls.serverThread.join()
1298 # Mock required environment variables during tests
1299 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1300 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1301 def setUp(self):
1302 config = Config(self.configFile)
1304 if self.useTempRoot:
1305 self.root = self.genRoot()
1306 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1307 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1309 self.datastoreStr = f"datastore={self.root}"
1310 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"]
1312 if not isWebdavEndpoint(self.rooturi):
1313 raise OSError("Webdav server not running properly: cannot run tests.")
1315 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1316 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1318 # Mock required environment variables during tests
1319 @unittest.mock.patch.dict(os.environ, {"WEBDAV_AUTH_METHOD": "TOKEN",
1320 "WEBDAV_BEARER_TOKEN": "XXXXXX"})
1321 def tearDown(self):
1322 # Clear temporary directory
1323 ButlerURI(self.rooturi).remove()
1325 def _serveWebdav(self, port: int, stopWebdavServer):
1326 """Starts a local webdav-compatible HTTP server,
1327 Listening on http://localhost:8080
1328 This server only runs when this test class is instantiated,
1329 and then shuts down. Must be started is a separate thread.
1331 Parameters
1332 ----------
1333 port : `int`
1334 The port number on which the server should listen
1335 """
1336 root_path = gettempdir()
1338 config = {
1339 "host": "0.0.0.0",
1340 "port": port,
1341 "provider_mapping": {"/": root_path},
1342 "http_authenticator": {
1343 "domain_controller": None
1344 },
1345 "simple_dc": {"user_mapping": {"*": True}},
1346 "verbose": 0,
1347 }
1348 app = WsgiDAVApp(config)
1350 server_args = {
1351 "bind_addr": (config["host"], config["port"]),
1352 "wsgi_app": app,
1353 }
1354 server = wsgi.Server(**server_args)
1355 server.prepare()
1357 try:
1358 # Start the actual server in a separate thread
1359 t = Thread(target=server.serve, daemon=True)
1360 t.start()
1361 # watch stopWebdavServer, and gracefully
1362 # shut down the server when True
1363 while True:
1364 if stopWebdavServer():
1365 break
1366 time.sleep(1)
1367 except KeyboardInterrupt:
1368 print("Caught Ctrl-C, shutting down...")
1369 finally:
1370 server.stop()
1371 t.join()
1373 def _getfreeport():
1374 """
1375 Determines a free port using sockets.
1376 """
1377 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1378 free_socket.bind(('0.0.0.0', 0))
1379 free_socket.listen()
1380 port = free_socket.getsockname()[1]
1381 free_socket.close()
1382 return port
1385if __name__ == "__main__": 1385 ↛ 1386line 1385 didn't jump to line 1386, because the condition on line 1385 was never true
1386 unittest.main()