Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
75TESTDIR = os.path.abspath(os.path.dirname(__file__))
78def makeExampleMetrics():
79 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
80 {"a": [1, 2, 3],
81 "b": {"blue": 5, "red": "green"}},
82 [563, 234, 456.7, 752, 8, 9, 27]
83 )
86class TransactionTestError(Exception):
87 """Specific error for testing transactions, to prevent misdiagnosing
88 that might otherwise occur when a standard exception is used.
89 """
90 pass
93class ButlerConfigTests(unittest.TestCase):
94 """Simple tests for ButlerConfig that are not tested in other test cases.
95 """
97 def testSearchPath(self):
98 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
99 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
100 config1 = ButlerConfig(configFile)
101 self.assertNotIn("testConfigs", "\n".join(cm.output))
103 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
104 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
105 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
106 self.assertIn("testConfigs", "\n".join(cm.output))
108 key = ("datastore", "records", "table")
109 self.assertNotEqual(config1[key], config2[key])
110 self.assertEqual(config2[key], "override_record")
113class ButlerPutGetTests:
114 """Helper method for running a suite of put/get tests from different
115 butler configurations."""
117 root = None
119 @staticmethod
120 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
121 """Create a DatasetType and register it
122 """
123 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(datasetType)
125 return datasetType
127 @classmethod
128 def setUpClass(cls):
129 cls.storageClassFactory = StorageClassFactory()
130 cls.storageClassFactory.addFromConfig(cls.configFile)
132 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
133 datasetType = datasetRef.datasetType
134 dataId = datasetRef.dataId
135 deferred = butler.getDirectDeferred(datasetRef)
137 for component in components:
138 compTypeName = datasetType.componentTypeName(component)
139 result = butler.get(compTypeName, dataId, collections=collections)
140 self.assertEqual(result, getattr(reference, component))
141 result_deferred = deferred.get(component=component)
142 self.assertEqual(result_deferred, result)
144 def tearDown(self):
145 if self.root is not None and os.path.exists(self.root):
146 shutil.rmtree(self.root, ignore_errors=True)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest/run"
152 tag = "ingest"
153 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
155 # There will not be a collection yet
156 collections = set(butler.registry.queryCollections())
157 self.assertEqual(collections, set([run, tag]))
159 # Create and register a DatasetType
160 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
162 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
164 # Add needed Dimensions
165 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
166 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
167 "name": "d-r",
168 "band": "R"})
169 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
170 "id": 1,
171 "name": "default"})
172 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
173 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
174 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
175 "name": "fourtwentythree", "physical_filter": "d-r",
176 "visit_system": 1, "datetime_begin": visit_start,
177 "datetime_end": visit_end})
179 # Add a second visit for some later tests
180 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
181 "name": "fourtwentyfour", "physical_filter": "d-r",
182 "visit_system": 1})
184 # Create and store a dataset
185 metric = makeExampleMetrics()
186 dataId = {"instrument": "DummyCamComp", "visit": 423}
188 # Create a DatasetRef for put
189 refIn = DatasetRef(datasetType, dataId, id=None)
191 # Put with a preexisting id should fail
192 with self.assertRaises(ValueError):
193 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
195 # Put and remove the dataset once as a DatasetRef, once as a dataId,
196 # and once with a DatasetType
198 # Keep track of any collections we add and do not clean up
199 expected_collections = {run, tag}
201 counter = 0
202 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
203 # Since we are using subTest we can get cascading failures
204 # here with the first attempt failing and the others failing
205 # immediately because the dataset already exists. Work around
206 # this by using a distinct run collection each time
207 counter += 1
208 this_run = f"put_run_{counter}"
209 this_tag = f"put_tag_{counter}"
210 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
211 butler.registry.registerCollection(this_tag, type=CollectionType.TAGGED)
212 expected_collections.update({this_run, this_tag})
214 with self.subTest(args=args):
215 ref = butler.put(metric, *args, run=this_run, tags=[this_tag])
216 self.assertIsInstance(ref, DatasetRef)
218 # Test getDirect
219 metricOut = butler.getDirect(ref)
220 self.assertEqual(metric, metricOut)
221 # Test get
222 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
223 self.assertEqual(metric, metricOut)
224 # Test get with a datasetRef
225 metricOut = butler.get(ref, collections=this_run)
226 self.assertEqual(metric, metricOut)
227 # Test getDeferred with dataId
228 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
229 self.assertEqual(metric, metricOut)
230 # Test getDeferred with a datasetRef
231 metricOut = butler.getDeferred(ref, collections=this_run).get()
232 self.assertEqual(metric, metricOut)
233 # and deferred direct with ref
234 metricOut = butler.getDirectDeferred(ref).get()
235 self.assertEqual(metric, metricOut)
237 # Check we can get components
238 if storageClass.isComposite():
239 self.assertGetComponents(butler, ref,
240 ("summary", "data", "output"), metric,
241 collections=this_run)
243 # Remove from the tagged collection only; after that we
244 # shouldn't be able to find it unless we use the dataset_id.
245 butler.pruneDatasets([ref], tags=[this_tag])
246 with self.assertRaises(LookupError):
247 butler.datasetExists(*args, collections=this_tag)
248 # Registry still knows about it, if we use the dataset_id.
249 self.assertEqual(butler.registry.getDataset(ref.id), ref)
250 # If we use the output ref with the dataset_id, we should
251 # still be able to load it with getDirect().
252 self.assertEqual(metric, butler.getDirect(ref))
254 # Reinsert into collection, then delete from Datastore *and*
255 # remove from collection.
256 butler.registry.associate(this_tag, [ref])
257 butler.pruneDatasets([ref], unstore=True, tags=[this_tag])
258 # Lookup with original args should still fail.
259 with self.assertRaises(LookupError):
260 butler.datasetExists(*args, collections=this_tag)
261 # Now getDirect() should fail, too.
262 with self.assertRaises(FileNotFoundError, msg=f"Checking ref {ref} not found"):
263 butler.getDirect(ref)
264 # Registry still knows about it, if we use the dataset_id.
265 self.assertEqual(butler.registry.getDataset(ref.id), ref)
267 # Now remove the dataset completely.
268 butler.pruneDatasets([ref], purge=True, unstore=True, tags=[this_tag], run=this_run)
269 # Lookup with original args should still fail.
270 with self.assertRaises(LookupError):
271 butler.datasetExists(*args, collections=this_run)
272 # getDirect() should still fail.
273 with self.assertRaises(FileNotFoundError):
274 butler.getDirect(ref)
275 # Registry shouldn't be able to find it by dataset_id anymore.
276 self.assertIsNone(butler.registry.getDataset(ref.id))
278 # Cleanup
279 for coll in (this_run, this_tag):
280 # Do explicit registry removal since we know they are
281 # empty
282 butler.registry.removeCollection(coll)
283 expected_collections.remove(coll)
285 # Put the dataset again, since the last thing we did was remove it
286 # and we want to use the default collection.
287 ref = butler.put(metric, refIn)
289 # Get with parameters
290 stop = 4
291 sliced = butler.get(ref, parameters={"slice": slice(stop)})
292 self.assertNotEqual(metric, sliced)
293 self.assertEqual(metric.summary, sliced.summary)
294 self.assertEqual(metric.output, sliced.output)
295 self.assertEqual(metric.data[:stop], sliced.data)
296 # getDeferred with parameters
297 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
298 self.assertNotEqual(metric, sliced)
299 self.assertEqual(metric.summary, sliced.summary)
300 self.assertEqual(metric.output, sliced.output)
301 self.assertEqual(metric.data[:stop], sliced.data)
302 # getDeferred with deferred parameters
303 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
304 self.assertNotEqual(metric, sliced)
305 self.assertEqual(metric.summary, sliced.summary)
306 self.assertEqual(metric.output, sliced.output)
307 self.assertEqual(metric.data[:stop], sliced.data)
309 if storageClass.isComposite():
310 # Check that components can be retrieved
311 metricOut = butler.get(ref.datasetType.name, dataId)
312 compNameS = ref.datasetType.componentTypeName("summary")
313 compNameD = ref.datasetType.componentTypeName("data")
314 summary = butler.get(compNameS, dataId)
315 self.assertEqual(summary, metric.summary)
316 data = butler.get(compNameD, dataId)
317 self.assertEqual(data, metric.data)
319 if "counter" in storageClass.derivedComponents:
320 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
321 self.assertEqual(count, len(data))
323 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
324 parameters={"slice": slice(stop)})
325 self.assertEqual(count, stop)
327 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
328 summary = butler.getDirect(compRef)
329 self.assertEqual(summary, metric.summary)
331 # Create a Dataset type that has the same name but is inconsistent.
332 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
333 self.storageClassFactory.getStorageClass("Config"))
335 # Getting with a dataset type that does not match registry fails
336 with self.assertRaises(ValueError):
337 butler.get(inconsistentDatasetType, dataId)
339 # Combining a DatasetRef with a dataId should fail
340 with self.assertRaises(ValueError):
341 butler.get(ref, dataId)
342 # Getting with an explicit ref should fail if the id doesn't match
343 with self.assertRaises(ValueError):
344 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
346 # Getting a dataset with unknown parameters should fail
347 with self.assertRaises(KeyError):
348 butler.get(ref, parameters={"unsupported": True})
350 # Check we have a collection
351 collections = set(butler.registry.queryCollections())
352 self.assertEqual(collections, expected_collections)
354 # Clean up to check that we can remove something that may have
355 # already had a component removed
356 butler.pruneDatasets([ref], unstore=True, purge=True)
358 # Add a dataset back in since some downstream tests require
359 # something to be present
360 ref = butler.put(metric, refIn)
362 return butler
364 def testDeferredCollectionPassing(self):
365 # Construct a butler with no run or collection, but make it writeable.
366 butler = Butler(self.tmpConfigFile, writeable=True)
367 # Create and register a DatasetType
368 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
369 datasetType = self.addDatasetType("example", dimensions,
370 self.storageClassFactory.getStorageClass("StructuredData"),
371 butler.registry)
372 # Add needed Dimensions
373 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
374 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
375 "name": "d-r",
376 "band": "R"})
377 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
378 "name": "fourtwentythree", "physical_filter": "d-r"})
379 dataId = {"instrument": "DummyCamComp", "visit": 423}
380 # Create dataset.
381 metric = makeExampleMetrics()
382 # Register a new run and put dataset.
383 run = "deferred"
384 butler.registry.registerRun(run)
385 ref = butler.put(metric, datasetType, dataId, run=run)
386 # Putting with no run should fail with TypeError.
387 with self.assertRaises(TypeError):
388 butler.put(metric, datasetType, dataId)
389 # Dataset should exist.
390 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
391 # We should be able to get the dataset back, but with and without
392 # a deferred dataset handle.
393 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
394 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
395 # Trying to find the dataset without any collection is a TypeError.
396 with self.assertRaises(TypeError):
397 butler.datasetExists(datasetType, dataId)
398 with self.assertRaises(TypeError):
399 butler.get(datasetType, dataId)
400 # Associate the dataset with a different collection.
401 butler.registry.registerCollection("tagged")
402 butler.registry.associate("tagged", [ref])
403 # Deleting the dataset from the new collection should make it findable
404 # in the original collection.
405 butler.pruneDatasets([ref], tags=["tagged"])
406 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
409class ButlerTests(ButlerPutGetTests):
410 """Tests for Butler.
411 """
412 useTempRoot = True
414 def setUp(self):
415 """Create a new butler root for each test."""
416 self.root = tempfile.mkdtemp(dir=TESTDIR)
417 Butler.makeRepo(self.root, config=Config(self.configFile))
418 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
420 def testConstructor(self):
421 """Independent test of constructor.
422 """
423 butler = Butler(self.tmpConfigFile, run="ingest")
424 self.assertIsInstance(butler, Butler)
426 collections = set(butler.registry.queryCollections())
427 self.assertEqual(collections, {"ingest"})
429 butler2 = Butler(butler=butler, collections=["other"])
430 self.assertEqual(
431 butler2.collections,
432 CollectionSearch.fromExpression(["other"])
433 )
434 self.assertIsNone(butler2.run)
435 self.assertIs(butler.registry, butler2.registry)
436 self.assertIs(butler.datastore, butler2.datastore)
438 def testBasicPutGet(self):
439 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
440 self.runPutGetTest(storageClass, "test_metric")
442 def testCompositePutGetConcrete(self):
444 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
445 butler = self.runPutGetTest(storageClass, "test_metric")
447 # Should *not* be disassembled
448 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
449 self.assertEqual(len(datasets), 1)
450 uri, components = butler.getURIs(datasets[0])
451 self.assertIsInstance(uri, ButlerURI)
452 self.assertFalse(components)
453 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
454 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
456 # Predicted dataset
457 dataId = {"instrument": "DummyCamComp", "visit": 424}
458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
459 self.assertFalse(components)
460 self.assertIsInstance(uri, ButlerURI)
461 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
462 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
464 def testCompositePutGetVirtual(self):
465 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
466 butler = self.runPutGetTest(storageClass, "test_metric_comp")
468 # Should be disassembled
469 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
470 self.assertEqual(len(datasets), 1)
471 uri, components = butler.getURIs(datasets[0])
473 if butler.datastore.isEphemeral:
474 # Never disassemble in-memory datastore
475 self.assertIsInstance(uri, ButlerURI)
476 self.assertFalse(components)
477 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
478 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
479 else:
480 self.assertIsNone(uri)
481 self.assertEqual(set(components), set(storageClass.components))
482 for compuri in components.values():
483 self.assertIsInstance(compuri, ButlerURI)
484 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
485 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
487 # Predicted dataset
488 dataId = {"instrument": "DummyCamComp", "visit": 424}
489 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
491 if butler.datastore.isEphemeral:
492 # Never disassembled
493 self.assertIsInstance(uri, ButlerURI)
494 self.assertFalse(components)
495 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
496 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
497 else:
498 self.assertIsNone(uri)
499 self.assertEqual(set(components), set(storageClass.components))
500 for compuri in components.values():
501 self.assertIsInstance(compuri, ButlerURI)
502 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
503 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
505 def testIngest(self):
506 butler = Butler(self.tmpConfigFile, run="ingest")
508 # Create and register a DatasetType
509 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
511 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
512 datasetTypeName = "metric"
514 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
516 # Add needed Dimensions
517 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
518 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
519 "name": "d-r",
520 "band": "R"})
521 for detector in (1, 2):
522 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
523 "full_name": f"detector{detector}"})
525 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
526 "name": "fourtwentythree", "physical_filter": "d-r"},
527 {"instrument": "DummyCamComp", "id": 424,
528 "name": "fourtwentyfour", "physical_filter": "d-r"})
530 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
531 dataRoot = os.path.join(TESTDIR, "data", "basic")
532 datasets = []
533 for detector in (1, 2):
534 detector_name = f"detector_{detector}"
535 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
536 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
537 # Create a DatasetRef for ingest
538 refIn = DatasetRef(datasetType, dataId, id=None)
540 datasets.append(FileDataset(path=metricFile,
541 refs=[refIn],
542 formatter=formatter))
544 butler.ingest(*datasets, transfer="copy")
546 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
547 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
549 metrics1 = butler.get(datasetTypeName, dataId1)
550 metrics2 = butler.get(datasetTypeName, dataId2)
551 self.assertNotEqual(metrics1, metrics2)
553 # Compare URIs
554 uri1 = butler.getURI(datasetTypeName, dataId1)
555 uri2 = butler.getURI(datasetTypeName, dataId2)
556 self.assertNotEqual(uri1, uri2)
558 # Now do a multi-dataset but single file ingest
559 metricFile = os.path.join(dataRoot, "detectors.yaml")
560 refs = []
561 for detector in (1, 2):
562 detector_name = f"detector_{detector}"
563 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
564 # Create a DatasetRef for ingest
565 refs.append(DatasetRef(datasetType, dataId, id=None))
567 datasets = []
568 datasets.append(FileDataset(path=metricFile,
569 refs=refs,
570 formatter=MultiDetectorFormatter))
572 butler.ingest(*datasets, transfer="copy")
574 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
575 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
577 multi1 = butler.get(datasetTypeName, dataId1)
578 multi2 = butler.get(datasetTypeName, dataId2)
580 self.assertEqual(multi1, metrics1)
581 self.assertEqual(multi2, metrics2)
583 # Compare URIs
584 uri1 = butler.getURI(datasetTypeName, dataId1)
585 uri2 = butler.getURI(datasetTypeName, dataId2)
586 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
588 # Test that removing one does not break the second
589 # This line will issue a warning log message for a ChainedDatastore
590 # that uses an InMemoryDatastore since in-memory can not ingest
591 # files.
592 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
593 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
594 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
595 multi2b = butler.get(datasetTypeName, dataId2)
596 self.assertEqual(multi2, multi2b)
598 def testPruneCollections(self):
599 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
600 butler = Butler(self.tmpConfigFile, writeable=True)
601 # Load registry data with dimensions to hang datasets off of.
602 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
603 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
604 # Add some RUN-type collections.
605 run1 = "run1"
606 butler.registry.registerRun(run1)
607 run2 = "run2"
608 butler.registry.registerRun(run2)
609 # put some datasets. ref1 and ref2 have the same data ID, and are in
610 # different runs. ref3 has a different data ID.
611 metric = makeExampleMetrics()
612 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
613 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
614 butler.registry)
615 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
616 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
617 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
619 # Add a new dataset type and delete it
620 tmpName = "prune_collections_disposable"
621 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass,
622 butler.registry)
623 tmpFromRegistry = butler.registry.getDatasetType(tmpName)
624 self.assertEqual(tmpDatasetType, tmpFromRegistry)
625 butler.registry.removeDatasetType(tmpName)
626 with self.assertRaises(KeyError):
627 butler.registry.getDatasetType(tmpName)
628 # Removing a second time is fine
629 butler.registry.removeDatasetType(tmpName)
631 # Component removal is not allowed
632 with self.assertRaises(ValueError):
633 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component"))
635 # Try and fail to delete a datasetType that is associated with data
636 with self.assertRaises(OrphanedRecordError):
637 butler.registry.removeDatasetType(datasetType.name)
639 # Try to delete a RUN collection without purge, or with purge and not
640 # unstore.
641 with self.assertRaises(TypeError):
642 butler.pruneCollection(run1)
643 with self.assertRaises(TypeError):
644 butler.pruneCollection(run2, purge=True)
645 # Add a TAGGED collection and associate ref3 only into it.
646 tag1 = "tag1"
647 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
648 butler.registry.associate(tag1, [ref3])
649 # Add a CHAINED collection that searches run1 and then run2. It
650 # logically contains only ref1, because ref2 is shadowed due to them
651 # having the same data ID and dataset type.
652 chain1 = "chain1"
653 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
654 butler.registry.setCollectionChain(chain1, [run1, run2])
655 # Try to delete RUN collections, which should fail with complete
656 # rollback because they're still referenced by the CHAINED
657 # collection.
658 with self.assertRaises(Exception):
659 butler.pruneCollection(run1, pruge=True, unstore=True)
660 with self.assertRaises(Exception):
661 butler.pruneCollection(run2, pruge=True, unstore=True)
662 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
663 [ref1, ref2, ref3])
664 self.assertTrue(butler.datastore.exists(ref1))
665 self.assertTrue(butler.datastore.exists(ref2))
666 self.assertTrue(butler.datastore.exists(ref3))
667 # Try to delete CHAINED and TAGGED collections with purge; should not
668 # work.
669 with self.assertRaises(TypeError):
670 butler.pruneCollection(tag1, purge=True, unstore=True)
671 with self.assertRaises(TypeError):
672 butler.pruneCollection(chain1, purge=True, unstore=True)
673 # Remove the tagged collection with unstore=False. This should not
674 # affect the datasets.
675 butler.pruneCollection(tag1)
676 with self.assertRaises(MissingCollectionError):
677 butler.registry.getCollectionType(tag1)
678 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
679 [ref1, ref2, ref3])
680 self.assertTrue(butler.datastore.exists(ref1))
681 self.assertTrue(butler.datastore.exists(ref2))
682 self.assertTrue(butler.datastore.exists(ref3))
683 # Add the tagged collection back in, and remove it with unstore=True.
684 # This should remove ref3 only from the datastore.
685 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
686 butler.registry.associate(tag1, [ref3])
687 butler.pruneCollection(tag1, unstore=True)
688 with self.assertRaises(MissingCollectionError):
689 butler.registry.getCollectionType(tag1)
690 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
691 [ref1, ref2, ref3])
692 self.assertTrue(butler.datastore.exists(ref1))
693 self.assertTrue(butler.datastore.exists(ref2))
694 self.assertFalse(butler.datastore.exists(ref3))
695 # Delete the chain with unstore=False. The datasets should not be
696 # affected at all.
697 butler.pruneCollection(chain1)
698 with self.assertRaises(MissingCollectionError):
699 butler.registry.getCollectionType(chain1)
700 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
701 [ref1, ref2, ref3])
702 self.assertTrue(butler.datastore.exists(ref1))
703 self.assertTrue(butler.datastore.exists(ref2))
704 self.assertFalse(butler.datastore.exists(ref3))
705 # Redefine and then delete the chain with unstore=True. Only ref1
706 # should be unstored (ref3 has already been unstored, but otherwise
707 # would be now).
708 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
709 butler.registry.setCollectionChain(chain1, [run1, run2])
710 butler.pruneCollection(chain1, unstore=True)
711 with self.assertRaises(MissingCollectionError):
712 butler.registry.getCollectionType(chain1)
713 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
714 [ref1, ref2, ref3])
715 self.assertFalse(butler.datastore.exists(ref1))
716 self.assertTrue(butler.datastore.exists(ref2))
717 self.assertFalse(butler.datastore.exists(ref3))
718 # Remove run1. This removes ref1 and ref3 from the registry (they're
719 # already gone from the datastore, which is fine).
720 butler.pruneCollection(run1, purge=True, unstore=True)
721 with self.assertRaises(MissingCollectionError):
722 butler.registry.getCollectionType(run1)
723 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
724 [ref2])
725 self.assertTrue(butler.datastore.exists(ref2))
726 # Remove run2. This removes ref2 from the registry and the datastore.
727 butler.pruneCollection(run2, purge=True, unstore=True)
728 with self.assertRaises(MissingCollectionError):
729 butler.registry.getCollectionType(run2)
730 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
731 [])
733 # Now that the collections have been pruned we can remove the
734 # dataset type
735 butler.registry.removeDatasetType(datasetType.name)
737 def testPickle(self):
738 """Test pickle support.
739 """
740 butler = Butler(self.tmpConfigFile, run="ingest")
741 butlerOut = pickle.loads(pickle.dumps(butler))
742 self.assertIsInstance(butlerOut, Butler)
743 self.assertEqual(butlerOut._config, butler._config)
744 self.assertEqual(butlerOut.collections, butler.collections)
745 self.assertEqual(butlerOut.run, butler.run)
747 def testGetDatasetTypes(self):
748 butler = Butler(self.tmpConfigFile, run="ingest")
749 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
750 dimensionEntries = [
751 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
752 {"instrument": "DummyCamComp"}),
753 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
754 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
755 ]
756 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
757 # Add needed Dimensions
758 for args in dimensionEntries:
759 butler.registry.insertDimensionData(*args)
761 # When a DatasetType is added to the registry entries are not created
762 # for components but querying them can return the components.
763 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
764 components = set()
765 for datasetTypeName in datasetTypeNames:
766 # Create and register a DatasetType
767 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
769 for componentName in storageClass.components:
770 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
772 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
773 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
775 # Now that we have some dataset types registered, validate them
776 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
777 "datasetType.component"])
779 # Add a new datasetType that will fail template validation
780 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
781 if self.validationCanFail:
782 with self.assertRaises(ValidationError):
783 butler.validateConfiguration()
785 # Rerun validation but with a subset of dataset type names
786 butler.validateConfiguration(datasetTypeNames=["metric4"])
788 # Rerun validation but ignore the bad datasetType
789 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
790 "datasetType.component"])
792 def testTransaction(self):
793 butler = Butler(self.tmpConfigFile, run="ingest")
794 datasetTypeName = "test_metric"
795 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
796 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
797 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
798 "band": "R"}),
799 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
800 "physical_filter": "d-r"}))
801 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
802 metric = makeExampleMetrics()
803 dataId = {"instrument": "DummyCam", "visit": 42}
804 # Create and register a DatasetType
805 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
806 with self.assertRaises(TransactionTestError):
807 with butler.transaction():
808 # Add needed Dimensions
809 for args in dimensionEntries:
810 butler.registry.insertDimensionData(*args)
811 # Store a dataset
812 ref = butler.put(metric, datasetTypeName, dataId)
813 self.assertIsInstance(ref, DatasetRef)
814 # Test getDirect
815 metricOut = butler.getDirect(ref)
816 self.assertEqual(metric, metricOut)
817 # Test get
818 metricOut = butler.get(datasetTypeName, dataId)
819 self.assertEqual(metric, metricOut)
820 # Check we can get components
821 self.assertGetComponents(butler, ref,
822 ("summary", "data", "output"), metric)
823 raise TransactionTestError("This should roll back the entire transaction")
824 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
825 butler.registry.expandDataId(dataId)
826 # Should raise LookupError for missing data ID value
827 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
828 butler.get(datasetTypeName, dataId)
829 # Also check explicitly if Dataset entry is missing
830 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
831 # Direct retrieval should not find the file in the Datastore
832 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
833 butler.getDirect(ref)
835 def testMakeRepo(self):
836 """Test that we can write butler configuration to a new repository via
837 the Butler.makeRepo interface and then instantiate a butler from the
838 repo root.
839 """
840 # Do not run the test if we know this datastore configuration does
841 # not support a file system root
842 if self.fullConfigKey is None:
843 return
845 # create two separate directories
846 root1 = tempfile.mkdtemp(dir=self.root)
847 root2 = tempfile.mkdtemp(dir=self.root)
849 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
850 limited = Config(self.configFile)
851 butler1 = Butler(butlerConfig)
852 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
853 full = Config(self.tmpConfigFile)
854 butler2 = Butler(butlerConfig)
855 # Butlers should have the same configuration regardless of whether
856 # defaults were expanded.
857 self.assertEqual(butler1._config, butler2._config)
858 # Config files loaded directly should not be the same.
859 self.assertNotEqual(limited, full)
860 # Make sure "limited" doesn't have a few keys we know it should be
861 # inheriting from defaults.
862 self.assertIn(self.fullConfigKey, full)
863 self.assertNotIn(self.fullConfigKey, limited)
865 # Collections don't appear until something is put in them
866 collections1 = set(butler1.registry.queryCollections())
867 self.assertEqual(collections1, set())
868 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
870 # Check that a config with no associated file name will not
871 # work properly with relocatable Butler repo
872 butlerConfig.configFile = None
873 with self.assertRaises(ValueError):
874 Butler(butlerConfig)
876 with self.assertRaises(FileExistsError):
877 Butler.makeRepo(self.root, standalone=True,
878 config=Config(self.configFile), overwrite=False)
880 def testStringification(self):
881 butler = Butler(self.tmpConfigFile, run="ingest")
882 butlerStr = str(butler)
884 if self.datastoreStr is not None:
885 for testStr in self.datastoreStr:
886 self.assertIn(testStr, butlerStr)
887 if self.registryStr is not None:
888 self.assertIn(self.registryStr, butlerStr)
890 datastoreName = butler.datastore.name
891 if self.datastoreName is not None:
892 for testStr in self.datastoreName:
893 self.assertIn(testStr, datastoreName)
896class FileLikeDatastoreButlerTests(ButlerTests):
897 """Common tests and specialization of ButlerTests for butlers backed
898 by datastores that inherit from FileLikeDatastore.
899 """
901 def checkFileExists(self, root, relpath):
902 """Checks if file exists at a given path (relative to root).
904 Test testPutTemplates verifies actual physical existance of the files
905 in the requested location.
906 """
907 uri = ButlerURI(root, forceDirectory=True)
908 return uri.join(relpath).exists()
910 def testPutTemplates(self):
911 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
912 butler = Butler(self.tmpConfigFile, run="ingest")
914 # Add needed Dimensions
915 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
916 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
917 "name": "d-r",
918 "band": "R"})
919 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
920 "physical_filter": "d-r"})
921 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
922 "physical_filter": "d-r"})
924 # Create and store a dataset
925 metric = makeExampleMetrics()
927 # Create two almost-identical DatasetTypes (both will use default
928 # template)
929 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
930 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
931 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
932 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
934 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
935 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
937 # Put with exactly the data ID keys needed
938 ref = butler.put(metric, "metric1", dataId1)
939 uri = butler.getURI(ref)
940 self.assertTrue(self.checkFileExists(butler.datastore.root,
941 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
942 f"Checking existence of {uri}")
944 # Check the template based on dimensions
945 butler.datastore.templates.validateTemplates([ref])
947 # Put with extra data ID keys (physical_filter is an optional
948 # dependency); should not change template (at least the way we're
949 # defining them to behave now; the important thing is that they
950 # must be consistent).
951 ref = butler.put(metric, "metric2", dataId2)
952 uri = butler.getURI(ref)
953 self.assertTrue(self.checkFileExists(butler.datastore.root,
954 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
955 f"Checking existence of {uri}")
957 # Check the template based on dimensions
958 butler.datastore.templates.validateTemplates([ref])
960 # Now use a file template that will not result in unique filenames
961 with self.assertRaises(FileTemplateValidationError):
962 butler.put(metric, "metric3", dataId1)
964 def testImportExport(self):
965 # Run put/get tests just to create and populate a repo.
966 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
967 self.runImportExportTest(storageClass)
969 @unittest.expectedFailure
970 def testImportExportVirtualComposite(self):
971 # Run put/get tests just to create and populate a repo.
972 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
973 self.runImportExportTest(storageClass)
975 def runImportExportTest(self, storageClass):
976 """This test does an export to a temp directory and an import back
977 into a new temp directory repo. It does not assume a posix datastore"""
978 exportButler = self.runPutGetTest(storageClass, "test_metric")
979 print("Root:", exportButler.datastore.root)
980 # Test that the repo actually has at least one dataset.
981 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
982 self.assertGreater(len(datasets), 0)
983 # Add a DimensionRecord that's unused by those datasets.
984 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
985 exportButler.registry.insertDimensionData("skymap", skymapRecord)
986 # Export and then import datasets.
987 with tempfile.TemporaryDirectory() as exportDir:
988 exportFile = os.path.join(exportDir, "exports.yaml")
989 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
990 export.saveDatasets(datasets)
991 # Export the same datasets again. This should quietly do
992 # nothing because of internal deduplication, and it shouldn't
993 # complain about being asked to export the "htm7" elements even
994 # though there aren't any in these datasets or in the database.
995 export.saveDatasets(datasets, elements=["htm7"])
996 # Save one of the data IDs again; this should be harmless
997 # because of internal deduplication.
998 export.saveDataIds([datasets[0].dataId])
999 # Save some dimension records directly.
1000 export.saveDimensionData("skymap", [skymapRecord])
1001 self.assertTrue(os.path.exists(exportFile))
1002 with tempfile.TemporaryDirectory() as importDir:
1003 # We always want this to be a local posix butler
1004 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1005 # Calling script.butlerImport tests the implementation of the
1006 # butler command line interface "import" subcommand. Functions
1007 # in the script folder are generally considered protected and
1008 # should not be used as public api.
1009 with open(exportFile, "r") as f:
1010 script.butlerImport(importDir, export_file=f,
1011 directory=exportDir, transfer="auto", skip_dimensions=None)
1012 importButler = Butler(importDir, run="ingest/run")
1013 for ref in datasets:
1014 with self.subTest(ref=ref):
1015 # Test for existence by passing in the DatasetType and
1016 # data ID separately, to avoid lookup by dataset_id.
1017 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1018 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1019 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1022class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1023 """PosixDatastore specialization of a butler"""
1024 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1025 fullConfigKey = ".datastore.formatters"
1026 validationCanFail = True
1027 datastoreStr = ["/tmp"]
1028 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
1029 registryStr = "/gen3.sqlite3"
1031 def testExportTransferCopy(self):
1032 """Test local export using all transfer modes"""
1033 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1034 exportButler = self.runPutGetTest(storageClass, "test_metric")
1035 # Test that the repo actually has at least one dataset.
1036 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1037 self.assertGreater(len(datasets), 0)
1038 uris = [exportButler.getURI(d) for d in datasets]
1039 datastoreRoot = exportButler.datastore.root
1041 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1043 for path in pathsInStore:
1044 # Assume local file system
1045 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1046 f"Checking path {path}")
1048 for transfer in ("copy", "link", "symlink", "relsymlink"):
1049 with tempfile.TemporaryDirectory(dir=TESTDIR) as exportDir:
1050 with exportButler.export(directory=exportDir, format="yaml",
1051 transfer=transfer) as export:
1052 export.saveDatasets(datasets)
1053 for path in pathsInStore:
1054 self.assertTrue(self.checkFileExists(exportDir, path),
1055 f"Check that mode {transfer} exported files")
1058class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1059 """InMemoryDatastore specialization of a butler"""
1060 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1061 fullConfigKey = None
1062 useTempRoot = False
1063 validationCanFail = False
1064 datastoreStr = ["datastore='InMemory"]
1065 datastoreName = ["InMemoryDatastore@"]
1066 registryStr = "/gen3.sqlite3"
1068 def testIngest(self):
1069 pass
1072class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1073 """PosixDatastore specialization"""
1074 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1075 fullConfigKey = ".datastore.datastores.1.formatters"
1076 validationCanFail = True
1077 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1/,", "/PosixDatastore_2/'"]
1078 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
1079 "SecondDatastore"]
1080 registryStr = "/gen3.sqlite3"
1083class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1084 """Test that a yaml file in one location can refer to a root in another."""
1086 datastoreStr = ["dir1"]
1087 # Disable the makeRepo test since we are deliberately not using
1088 # butler.yaml as the config name.
1089 fullConfigKey = None
1091 def setUp(self):
1092 self.root = tempfile.mkdtemp(dir=TESTDIR)
1094 # Make a new repository in one place
1095 self.dir1 = os.path.join(self.root, "dir1")
1096 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1098 # Move the yaml file to a different place and add a "root"
1099 self.dir2 = os.path.join(self.root, "dir2")
1100 safeMakeDir(self.dir2)
1101 configFile1 = os.path.join(self.dir1, "butler.yaml")
1102 config = Config(configFile1)
1103 config["root"] = self.dir1
1104 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1105 config.dumpToUri(configFile2)
1106 os.remove(configFile1)
1107 self.tmpConfigFile = configFile2
1109 def testFileLocations(self):
1110 self.assertNotEqual(self.dir1, self.dir2)
1111 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1112 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1113 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1116class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1117 """Test that a config file created by makeRepo outside of repo works."""
1119 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1121 def setUp(self):
1122 self.root = tempfile.mkdtemp(dir=TESTDIR)
1123 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1125 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1126 Butler.makeRepo(self.root, config=Config(self.configFile),
1127 outfile=self.tmpConfigFile)
1129 def tearDown(self):
1130 if os.path.exists(self.root2):
1131 shutil.rmtree(self.root2, ignore_errors=True)
1132 super().tearDown()
1134 def testConfigExistence(self):
1135 c = Config(self.tmpConfigFile)
1136 uri_config = ButlerURI(c["root"])
1137 uri_expected = ButlerURI(self.root, forceDirectory=True)
1138 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1139 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1141 def testPutGet(self):
1142 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1143 self.runPutGetTest(storageClass, "test_metric")
1146class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1147 """Test that a config file created by makeRepo outside of repo works."""
1149 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1151 def setUp(self):
1152 self.root = tempfile.mkdtemp(dir=TESTDIR)
1153 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1155 self.tmpConfigFile = self.root2
1156 Butler.makeRepo(self.root, config=Config(self.configFile),
1157 outfile=self.tmpConfigFile)
1159 def testConfigExistence(self):
1160 # Append the yaml file else Config constructor does not know the file
1161 # type.
1162 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1163 super().testConfigExistence()
1166class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1167 """Test that a config file created by makeRepo outside of repo works."""
1169 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1171 def setUp(self):
1172 self.root = tempfile.mkdtemp(dir=TESTDIR)
1173 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
1175 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1176 Butler.makeRepo(self.root, config=Config(self.configFile),
1177 outfile=self.tmpConfigFile)
1180@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1181@mock_s3
1182class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1183 """S3Datastore specialization of a butler; an S3 storage Datastore +
1184 a local in-memory SqlRegistry.
1185 """
1186 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1187 fullConfigKey = None
1188 validationCanFail = True
1190 bucketName = "anybucketname"
1191 """Name of the Bucket that will be used in the tests. The name is read from
1192 the config file used with the tests during set-up.
1193 """
1195 root = "butlerRoot/"
1196 """Root repository directory expected to be used in case useTempRoot=False.
1197 Otherwise the root is set to a 20 characters long randomly generated string
1198 during set-up.
1199 """
1201 datastoreStr = [f"datastore={root}"]
1202 """Contains all expected root locations in a format expected to be
1203 returned by Butler stringification.
1204 """
1206 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1207 """The expected format of the S3Datastore string."""
1209 registryStr = "/gen3.sqlite3"
1210 """Expected format of the Registry string."""
1212 def genRoot(self):
1213 """Returns a random string of len 20 to serve as a root
1214 name for the temporary bucket repo.
1216 This is equivalent to tempfile.mkdtemp as this is what self.root
1217 becomes when useTempRoot is True.
1218 """
1219 rndstr = "".join(
1220 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1221 )
1222 return rndstr + "/"
1224 def setUp(self):
1225 config = Config(self.configFile)
1226 uri = ButlerURI(config[".datastore.datastore.root"])
1227 self.bucketName = uri.netloc
1229 # set up some fake credentials if they do not exist
1230 self.usingDummyCredentials = setAwsEnvCredentials()
1232 if self.useTempRoot:
1233 self.root = self.genRoot()
1234 rooturi = f"s3://{self.bucketName}/{self.root}"
1235 config.update({"datastore": {"datastore": {"root": rooturi}}})
1237 # need local folder to store registry database
1238 self.reg_dir = tempfile.mkdtemp(dir=TESTDIR)
1239 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1241 # MOTO needs to know that we expect Bucket bucketname to exist
1242 # (this used to be the class attribute bucketName)
1243 s3 = boto3.resource("s3")
1244 s3.create_bucket(Bucket=self.bucketName)
1246 self.datastoreStr = f"datastore={self.root}"
1247 self.datastoreName = [f"S3Datastore@{rooturi}"]
1248 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1249 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1251 def tearDown(self):
1252 s3 = boto3.resource("s3")
1253 bucket = s3.Bucket(self.bucketName)
1254 try:
1255 bucket.objects.all().delete()
1256 except botocore.exceptions.ClientError as e:
1257 if e.response["Error"]["Code"] == "404":
1258 # the key was not reachable - pass
1259 pass
1260 else:
1261 raise
1263 bucket = s3.Bucket(self.bucketName)
1264 bucket.delete()
1266 # unset any potentially set dummy credentials
1267 if self.usingDummyCredentials:
1268 unsetAwsEnvCredentials()
1270 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1271 shutil.rmtree(self.reg_dir, ignore_errors=True)
1274@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1275# Mock required environment variables during tests
1276@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1277 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1278 TESTDIR, "config/testConfigs/webdav/token"),
1279 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1280class WebdavDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1281 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1282 a local in-memory SqlRegistry.
1283 """
1284 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1285 fullConfigKey = None
1286 validationCanFail = True
1288 serverName = "localhost"
1289 """Name of the server that will be used in the tests.
1290 """
1292 portNumber = 8080
1293 """Port on which the webdav server listens. Automatically chosen
1294 at setUpClass via the _getfreeport() method
1295 """
1297 root = "butlerRoot/"
1298 """Root repository directory expected to be used in case useTempRoot=False.
1299 Otherwise the root is set to a 20 characters long randomly generated string
1300 during set-up.
1301 """
1303 datastoreStr = [f"datastore={root}"]
1304 """Contains all expected root locations in a format expected to be
1305 returned by Butler stringification.
1306 """
1308 datastoreName = ["WebdavDatastore@https://{serverName}/{root}"]
1309 """The expected format of the WebdavDatastore string."""
1311 registryStr = "/gen3.sqlite3"
1312 """Expected format of the Registry string."""
1314 serverThread = None
1315 """Thread in which the local webdav server will run"""
1317 stopWebdavServer = False
1318 """This flag will cause the webdav server to
1319 gracefully shut down when True
1320 """
1322 def genRoot(self):
1323 """Returns a random string of len 20 to serve as a root
1324 name for the temporary bucket repo.
1326 This is equivalent to tempfile.mkdtemp as this is what self.root
1327 becomes when useTempRoot is True.
1328 """
1329 rndstr = "".join(
1330 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1331 )
1332 return rndstr + "/"
1334 @classmethod
1335 def setUpClass(cls):
1336 # Do the same as inherited class
1337 cls.storageClassFactory = StorageClassFactory()
1338 cls.storageClassFactory.addFromConfig(cls.configFile)
1340 cls.portNumber = cls._getfreeport()
1341 # Run a local webdav server on which tests will be run
1342 cls.serverThread = Thread(target=cls._serveWebdav,
1343 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1344 daemon=True)
1345 cls.serverThread.start()
1346 # Wait for it to start
1347 time.sleep(3)
1349 @classmethod
1350 def tearDownClass(cls):
1351 # Ask for graceful shut down of the webdav server
1352 cls.stopWebdavServer = True
1353 # Wait for the thread to exit
1354 cls.serverThread.join()
1356 # Mock required environment variables during tests
1357 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1358 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1359 TESTDIR, "config/testConfigs/webdav/token"),
1360 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1361 def setUp(self):
1362 config = Config(self.configFile)
1364 if self.useTempRoot:
1365 self.root = self.genRoot()
1366 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1367 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1369 # need local folder to store registry database
1370 self.reg_dir = tempfile.mkdtemp(dir=TESTDIR)
1371 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1373 self.datastoreStr = f"datastore={self.root}"
1374 self.datastoreName = [f"WebdavDatastore@{self.rooturi}"]
1376 if not isWebdavEndpoint(self.rooturi):
1377 raise OSError("Webdav server not running properly: cannot run tests.")
1379 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1380 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1382 # Mock required environment variables during tests
1383 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1384 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1385 TESTDIR, "config/testConfigs/webdav/token"),
1386 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1387 def tearDown(self):
1388 # Clear temporary directory
1389 ButlerURI(self.rooturi).remove()
1390 ButlerURI(self.rooturi).session.close()
1392 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1393 shutil.rmtree(self.reg_dir, ignore_errors=True)
1395 def _serveWebdav(self, port: int, stopWebdavServer):
1396 """Starts a local webdav-compatible HTTP server,
1397 Listening on http://localhost:8080
1398 This server only runs when this test class is instantiated,
1399 and then shuts down. Must be started is a separate thread.
1401 Parameters
1402 ----------
1403 port : `int`
1404 The port number on which the server should listen
1405 """
1406 root_path = gettempdir()
1408 config = {
1409 "host": "0.0.0.0",
1410 "port": port,
1411 "provider_mapping": {"/": root_path},
1412 "http_authenticator": {
1413 "domain_controller": None
1414 },
1415 "simple_dc": {"user_mapping": {"*": True}},
1416 "verbose": 0,
1417 }
1418 app = WsgiDAVApp(config)
1420 server_args = {
1421 "bind_addr": (config["host"], config["port"]),
1422 "wsgi_app": app,
1423 }
1424 server = wsgi.Server(**server_args)
1425 server.prepare()
1427 try:
1428 # Start the actual server in a separate thread
1429 t = Thread(target=server.serve, daemon=True)
1430 t.start()
1431 # watch stopWebdavServer, and gracefully
1432 # shut down the server when True
1433 while True:
1434 if stopWebdavServer():
1435 break
1436 time.sleep(1)
1437 except KeyboardInterrupt:
1438 print("Caught Ctrl-C, shutting down...")
1439 finally:
1440 server.stop()
1441 t.join()
1443 def _getfreeport():
1444 """
1445 Determines a free port using sockets.
1446 """
1447 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1448 free_socket.bind(('0.0.0.0', 0))
1449 free_socket.listen()
1450 port = free_socket.getsockname()[1]
1451 free_socket.close()
1452 return port
1455if __name__ == "__main__": 1455 ↛ 1456line 1455 didn't jump to line 1456, because the condition on line 1455 was never true
1456 unittest.main()