Coverage for tests/test_butler.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, OrphanedRecordError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
76TESTDIR = os.path.abspath(os.path.dirname(__file__))
79def makeExampleMetrics():
80 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
81 {"a": [1, 2, 3],
82 "b": {"blue": 5, "red": "green"}},
83 [563, 234, 456.7, 752, 8, 9, 27]
84 )
87class TransactionTestError(Exception):
88 """Specific error for testing transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
91 pass
94class ButlerConfigTests(unittest.TestCase):
95 """Simple tests for ButlerConfig that are not tested in other test cases.
96 """
98 def testSearchPath(self):
99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
101 config1 = ButlerConfig(configFile)
102 self.assertNotIn("testConfigs", "\n".join(cm.output))
104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
107 self.assertIn("testConfigs", "\n".join(cm.output))
109 key = ("datastore", "records", "table")
110 self.assertNotEqual(config1[key], config2[key])
111 self.assertEqual(config2[key], "override_record")
114class ButlerPutGetTests:
115 """Helper method for running a suite of put/get tests from different
116 butler configurations."""
118 root = None
120 @staticmethod
121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
122 """Create a DatasetType and register it
123 """
124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
125 registry.registerDatasetType(datasetType)
126 return datasetType
128 @classmethod
129 def setUpClass(cls):
130 cls.storageClassFactory = StorageClassFactory()
131 cls.storageClassFactory.addFromConfig(cls.configFile)
133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
134 datasetType = datasetRef.datasetType
135 dataId = datasetRef.dataId
136 deferred = butler.getDirectDeferred(datasetRef)
138 for component in components:
139 compTypeName = datasetType.componentTypeName(component)
140 result = butler.get(compTypeName, dataId, collections=collections)
141 self.assertEqual(result, getattr(reference, component))
142 result_deferred = deferred.get(component=component)
143 self.assertEqual(result_deferred, result)
145 def tearDown(self):
146 removeTestTempDir(self.root)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest"
152 butler = Butler(self.tmpConfigFile, run=run)
154 collections = set(butler.registry.queryCollections())
155 self.assertEqual(collections, set([run]))
157 # Create and register a DatasetType
158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
162 # Add needed Dimensions
163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
165 "name": "d-r",
166 "band": "R"})
167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
168 "id": 1,
169 "name": "default"})
170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
173 "name": "fourtwentythree", "physical_filter": "d-r",
174 "visit_system": 1, "datetime_begin": visit_start,
175 "datetime_end": visit_end})
177 # Add a second visit for some later tests
178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
179 "name": "fourtwentyfour", "physical_filter": "d-r",
180 "visit_system": 1})
182 # Create and store a dataset
183 metric = makeExampleMetrics()
184 dataId = {"instrument": "DummyCamComp", "visit": 423}
186 # Create a DatasetRef for put
187 refIn = DatasetRef(datasetType, dataId, id=None)
189 # Put with a preexisting id should fail
190 with self.assertRaises(ValueError):
191 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
193 # Put and remove the dataset once as a DatasetRef, once as a dataId,
194 # and once with a DatasetType
196 # Keep track of any collections we add and do not clean up
197 expected_collections = {run}
199 counter = 0
200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
201 # Since we are using subTest we can get cascading failures
202 # here with the first attempt failing and the others failing
203 # immediately because the dataset already exists. Work around
204 # this by using a distinct run collection each time
205 counter += 1
206 this_run = f"put_run_{counter}"
207 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
208 expected_collections.update({this_run})
210 with self.subTest(args=args):
211 ref = butler.put(metric, *args, run=this_run)
212 self.assertIsInstance(ref, DatasetRef)
214 # Test getDirect
215 metricOut = butler.getDirect(ref)
216 self.assertEqual(metric, metricOut)
217 # Test get
218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
219 self.assertEqual(metric, metricOut)
220 # Test get with a datasetRef
221 metricOut = butler.get(ref, collections=this_run)
222 self.assertEqual(metric, metricOut)
223 # Test getDeferred with dataId
224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
225 self.assertEqual(metric, metricOut)
226 # Test getDeferred with a datasetRef
227 metricOut = butler.getDeferred(ref, collections=this_run).get()
228 self.assertEqual(metric, metricOut)
229 # and deferred direct with ref
230 metricOut = butler.getDirectDeferred(ref).get()
231 self.assertEqual(metric, metricOut)
233 # Check we can get components
234 if storageClass.isComposite():
235 self.assertGetComponents(butler, ref,
236 ("summary", "data", "output"), metric,
237 collections=this_run)
239 # Now remove the dataset completely.
240 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
241 # Lookup with original args should still fail.
242 with self.assertRaises(LookupError):
243 butler.datasetExists(*args, collections=this_run)
244 # getDirect() should still fail.
245 with self.assertRaises(FileNotFoundError):
246 butler.getDirect(ref)
247 # Registry shouldn't be able to find it by dataset_id anymore.
248 self.assertIsNone(butler.registry.getDataset(ref.id))
250 # Do explicit registry removal since we know they are
251 # empty
252 butler.registry.removeCollection(this_run)
253 expected_collections.remove(this_run)
255 # Put the dataset again, since the last thing we did was remove it
256 # and we want to use the default collection.
257 ref = butler.put(metric, refIn)
259 # Get with parameters
260 stop = 4
261 sliced = butler.get(ref, parameters={"slice": slice(stop)})
262 self.assertNotEqual(metric, sliced)
263 self.assertEqual(metric.summary, sliced.summary)
264 self.assertEqual(metric.output, sliced.output)
265 self.assertEqual(metric.data[:stop], sliced.data)
266 # getDeferred with parameters
267 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
268 self.assertNotEqual(metric, sliced)
269 self.assertEqual(metric.summary, sliced.summary)
270 self.assertEqual(metric.output, sliced.output)
271 self.assertEqual(metric.data[:stop], sliced.data)
272 # getDeferred with deferred parameters
273 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
274 self.assertNotEqual(metric, sliced)
275 self.assertEqual(metric.summary, sliced.summary)
276 self.assertEqual(metric.output, sliced.output)
277 self.assertEqual(metric.data[:stop], sliced.data)
279 if storageClass.isComposite():
280 # Check that components can be retrieved
281 metricOut = butler.get(ref.datasetType.name, dataId)
282 compNameS = ref.datasetType.componentTypeName("summary")
283 compNameD = ref.datasetType.componentTypeName("data")
284 summary = butler.get(compNameS, dataId)
285 self.assertEqual(summary, metric.summary)
286 data = butler.get(compNameD, dataId)
287 self.assertEqual(data, metric.data)
289 if "counter" in storageClass.derivedComponents:
290 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
291 self.assertEqual(count, len(data))
293 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
294 parameters={"slice": slice(stop)})
295 self.assertEqual(count, stop)
297 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
298 summary = butler.getDirect(compRef)
299 self.assertEqual(summary, metric.summary)
301 # Create a Dataset type that has the same name but is inconsistent.
302 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
303 self.storageClassFactory.getStorageClass("Config"))
305 # Getting with a dataset type that does not match registry fails
306 with self.assertRaises(ValueError):
307 butler.get(inconsistentDatasetType, dataId)
309 # Combining a DatasetRef with a dataId should fail
310 with self.assertRaises(ValueError):
311 butler.get(ref, dataId)
312 # Getting with an explicit ref should fail if the id doesn't match
313 with self.assertRaises(ValueError):
314 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
316 # Getting a dataset with unknown parameters should fail
317 with self.assertRaises(KeyError):
318 butler.get(ref, parameters={"unsupported": True})
320 # Check we have a collection
321 collections = set(butler.registry.queryCollections())
322 self.assertEqual(collections, expected_collections)
324 # Clean up to check that we can remove something that may have
325 # already had a component removed
326 butler.pruneDatasets([ref], unstore=True, purge=True)
328 # Add a dataset back in since some downstream tests require
329 # something to be present
330 ref = butler.put(metric, refIn)
332 return butler
334 def testDeferredCollectionPassing(self):
335 # Construct a butler with no run or collection, but make it writeable.
336 butler = Butler(self.tmpConfigFile, writeable=True)
337 # Create and register a DatasetType
338 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
339 datasetType = self.addDatasetType("example", dimensions,
340 self.storageClassFactory.getStorageClass("StructuredData"),
341 butler.registry)
342 # Add needed Dimensions
343 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
344 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
345 "name": "d-r",
346 "band": "R"})
347 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
348 "name": "fourtwentythree", "physical_filter": "d-r"})
349 dataId = {"instrument": "DummyCamComp", "visit": 423}
350 # Create dataset.
351 metric = makeExampleMetrics()
352 # Register a new run and put dataset.
353 run = "deferred"
354 butler.registry.registerRun(run)
355 ref = butler.put(metric, datasetType, dataId, run=run)
356 # Putting with no run should fail with TypeError.
357 with self.assertRaises(TypeError):
358 butler.put(metric, datasetType, dataId)
359 # Dataset should exist.
360 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
361 # We should be able to get the dataset back, but with and without
362 # a deferred dataset handle.
363 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
364 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
365 # Trying to find the dataset without any collection is a TypeError.
366 with self.assertRaises(TypeError):
367 butler.datasetExists(datasetType, dataId)
368 with self.assertRaises(TypeError):
369 butler.get(datasetType, dataId)
370 # Associate the dataset with a different collection.
371 butler.registry.registerCollection("tagged")
372 butler.registry.associate("tagged", [ref])
373 # Deleting the dataset from the new collection should make it findable
374 # in the original collection.
375 butler.pruneDatasets([ref], tags=["tagged"])
376 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
379class ButlerTests(ButlerPutGetTests):
380 """Tests for Butler.
381 """
382 useTempRoot = True
384 def setUp(self):
385 """Create a new butler root for each test."""
386 self.root = makeTestTempDir(TESTDIR)
387 Butler.makeRepo(self.root, config=Config(self.configFile))
388 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
390 def testConstructor(self):
391 """Independent test of constructor.
392 """
393 butler = Butler(self.tmpConfigFile, run="ingest")
394 self.assertIsInstance(butler, Butler)
396 collections = set(butler.registry.queryCollections())
397 self.assertEqual(collections, {"ingest"})
399 butler2 = Butler(butler=butler, collections=["other"])
400 self.assertEqual(
401 butler2.collections,
402 CollectionSearch.fromExpression(["other"])
403 )
404 self.assertIsNone(butler2.run)
405 self.assertIs(butler.datastore, butler2.datastore)
407 def testBasicPutGet(self):
408 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
409 self.runPutGetTest(storageClass, "test_metric")
411 def testCompositePutGetConcrete(self):
413 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
414 butler = self.runPutGetTest(storageClass, "test_metric")
416 # Should *not* be disassembled
417 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
418 self.assertEqual(len(datasets), 1)
419 uri, components = butler.getURIs(datasets[0])
420 self.assertIsInstance(uri, ButlerURI)
421 self.assertFalse(components)
422 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
423 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
425 # Predicted dataset
426 dataId = {"instrument": "DummyCamComp", "visit": 424}
427 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
428 self.assertFalse(components)
429 self.assertIsInstance(uri, ButlerURI)
430 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
431 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
433 def testCompositePutGetVirtual(self):
434 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
435 butler = self.runPutGetTest(storageClass, "test_metric_comp")
437 # Should be disassembled
438 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
439 self.assertEqual(len(datasets), 1)
440 uri, components = butler.getURIs(datasets[0])
442 if butler.datastore.isEphemeral:
443 # Never disassemble in-memory datastore
444 self.assertIsInstance(uri, ButlerURI)
445 self.assertFalse(components)
446 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
447 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
448 else:
449 self.assertIsNone(uri)
450 self.assertEqual(set(components), set(storageClass.components))
451 for compuri in components.values():
452 self.assertIsInstance(compuri, ButlerURI)
453 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
454 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
456 # Predicted dataset
457 dataId = {"instrument": "DummyCamComp", "visit": 424}
458 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
460 if butler.datastore.isEphemeral:
461 # Never disassembled
462 self.assertIsInstance(uri, ButlerURI)
463 self.assertFalse(components)
464 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
465 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
466 else:
467 self.assertIsNone(uri)
468 self.assertEqual(set(components), set(storageClass.components))
469 for compuri in components.values():
470 self.assertIsInstance(compuri, ButlerURI)
471 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
472 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
474 def testIngest(self):
475 butler = Butler(self.tmpConfigFile, run="ingest")
477 # Create and register a DatasetType
478 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
480 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
481 datasetTypeName = "metric"
483 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
485 # Add needed Dimensions
486 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
487 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
488 "name": "d-r",
489 "band": "R"})
490 for detector in (1, 2):
491 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
492 "full_name": f"detector{detector}"})
494 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
495 "name": "fourtwentythree", "physical_filter": "d-r"},
496 {"instrument": "DummyCamComp", "id": 424,
497 "name": "fourtwentyfour", "physical_filter": "d-r"})
499 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
500 dataRoot = os.path.join(TESTDIR, "data", "basic")
501 datasets = []
502 for detector in (1, 2):
503 detector_name = f"detector_{detector}"
504 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
505 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
506 # Create a DatasetRef for ingest
507 refIn = DatasetRef(datasetType, dataId, id=None)
509 datasets.append(FileDataset(path=metricFile,
510 refs=[refIn],
511 formatter=formatter))
513 butler.ingest(*datasets, transfer="copy")
515 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
516 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
518 metrics1 = butler.get(datasetTypeName, dataId1)
519 metrics2 = butler.get(datasetTypeName, dataId2)
520 self.assertNotEqual(metrics1, metrics2)
522 # Compare URIs
523 uri1 = butler.getURI(datasetTypeName, dataId1)
524 uri2 = butler.getURI(datasetTypeName, dataId2)
525 self.assertNotEqual(uri1, uri2)
527 # Now do a multi-dataset but single file ingest
528 metricFile = os.path.join(dataRoot, "detectors.yaml")
529 refs = []
530 for detector in (1, 2):
531 detector_name = f"detector_{detector}"
532 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
533 # Create a DatasetRef for ingest
534 refs.append(DatasetRef(datasetType, dataId, id=None))
536 datasets = []
537 datasets.append(FileDataset(path=metricFile,
538 refs=refs,
539 formatter=MultiDetectorFormatter))
541 butler.ingest(*datasets, transfer="copy")
543 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
544 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
546 multi1 = butler.get(datasetTypeName, dataId1)
547 multi2 = butler.get(datasetTypeName, dataId2)
549 self.assertEqual(multi1, metrics1)
550 self.assertEqual(multi2, metrics2)
552 # Compare URIs
553 uri1 = butler.getURI(datasetTypeName, dataId1)
554 uri2 = butler.getURI(datasetTypeName, dataId2)
555 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
557 # Test that removing one does not break the second
558 # This line will issue a warning log message for a ChainedDatastore
559 # that uses an InMemoryDatastore since in-memory can not ingest
560 # files.
561 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
562 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
563 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
564 multi2b = butler.get(datasetTypeName, dataId2)
565 self.assertEqual(multi2, multi2b)
567 def testPruneCollections(self):
568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
569 butler = Butler(self.tmpConfigFile, writeable=True)
570 # Load registry data with dimensions to hang datasets off of.
571 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
572 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
573 # Add some RUN-type collections.
574 run1 = "run1"
575 butler.registry.registerRun(run1)
576 run2 = "run2"
577 butler.registry.registerRun(run2)
578 # put some datasets. ref1 and ref2 have the same data ID, and are in
579 # different runs. ref3 has a different data ID.
580 metric = makeExampleMetrics()
581 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
582 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
583 butler.registry)
584 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
585 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
586 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
588 # Add a new dataset type and delete it
589 tmpName = "prune_collections_disposable"
590 tmpDatasetType = self.addDatasetType(tmpName, dimensions, storageClass,
591 butler.registry)
592 tmpFromRegistry = butler.registry.getDatasetType(tmpName)
593 self.assertEqual(tmpDatasetType, tmpFromRegistry)
594 butler.registry.removeDatasetType(tmpName)
595 with self.assertRaises(KeyError):
596 butler.registry.getDatasetType(tmpName)
597 # Removing a second time is fine
598 butler.registry.removeDatasetType(tmpName)
600 # Component removal is not allowed
601 with self.assertRaises(ValueError):
602 butler.registry.removeDatasetType(DatasetType.nameWithComponent(tmpName, "component"))
604 # Try and fail to delete a datasetType that is associated with data
605 with self.assertRaises(OrphanedRecordError):
606 butler.registry.removeDatasetType(datasetType.name)
608 # Try to delete a RUN collection without purge, or with purge and not
609 # unstore.
610 with self.assertRaises(TypeError):
611 butler.pruneCollection(run1)
612 with self.assertRaises(TypeError):
613 butler.pruneCollection(run2, purge=True)
614 # Add a TAGGED collection and associate ref3 only into it.
615 tag1 = "tag1"
616 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
617 butler.registry.associate(tag1, [ref3])
618 # Add a CHAINED collection that searches run1 and then run2. It
619 # logically contains only ref1, because ref2 is shadowed due to them
620 # having the same data ID and dataset type.
621 chain1 = "chain1"
622 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
623 butler.registry.setCollectionChain(chain1, [run1, run2])
624 # Try to delete RUN collections, which should fail with complete
625 # rollback because they're still referenced by the CHAINED
626 # collection.
627 with self.assertRaises(Exception):
628 butler.pruneCollection(run1, pruge=True, unstore=True)
629 with self.assertRaises(Exception):
630 butler.pruneCollection(run2, pruge=True, unstore=True)
631 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
632 [ref1, ref2, ref3])
633 self.assertTrue(butler.datastore.exists(ref1))
634 self.assertTrue(butler.datastore.exists(ref2))
635 self.assertTrue(butler.datastore.exists(ref3))
636 # Try to delete CHAINED and TAGGED collections with purge; should not
637 # work.
638 with self.assertRaises(TypeError):
639 butler.pruneCollection(tag1, purge=True, unstore=True)
640 with self.assertRaises(TypeError):
641 butler.pruneCollection(chain1, purge=True, unstore=True)
642 # Remove the tagged collection with unstore=False. This should not
643 # affect the datasets.
644 butler.pruneCollection(tag1)
645 with self.assertRaises(MissingCollectionError):
646 butler.registry.getCollectionType(tag1)
647 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
648 [ref1, ref2, ref3])
649 self.assertTrue(butler.datastore.exists(ref1))
650 self.assertTrue(butler.datastore.exists(ref2))
651 self.assertTrue(butler.datastore.exists(ref3))
652 # Add the tagged collection back in, and remove it with unstore=True.
653 # This should remove ref3 only from the datastore.
654 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
655 butler.registry.associate(tag1, [ref3])
656 butler.pruneCollection(tag1, unstore=True)
657 with self.assertRaises(MissingCollectionError):
658 butler.registry.getCollectionType(tag1)
659 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
660 [ref1, ref2, ref3])
661 self.assertTrue(butler.datastore.exists(ref1))
662 self.assertTrue(butler.datastore.exists(ref2))
663 self.assertFalse(butler.datastore.exists(ref3))
664 # Delete the chain with unstore=False. The datasets should not be
665 # affected at all.
666 butler.pruneCollection(chain1)
667 with self.assertRaises(MissingCollectionError):
668 butler.registry.getCollectionType(chain1)
669 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
670 [ref1, ref2, ref3])
671 self.assertTrue(butler.datastore.exists(ref1))
672 self.assertTrue(butler.datastore.exists(ref2))
673 self.assertFalse(butler.datastore.exists(ref3))
674 # Redefine and then delete the chain with unstore=True. Only ref1
675 # should be unstored (ref3 has already been unstored, but otherwise
676 # would be now).
677 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
678 butler.registry.setCollectionChain(chain1, [run1, run2])
679 butler.pruneCollection(chain1, unstore=True)
680 with self.assertRaises(MissingCollectionError):
681 butler.registry.getCollectionType(chain1)
682 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
683 [ref1, ref2, ref3])
684 self.assertFalse(butler.datastore.exists(ref1))
685 self.assertTrue(butler.datastore.exists(ref2))
686 self.assertFalse(butler.datastore.exists(ref3))
687 # Remove run1. This removes ref1 and ref3 from the registry (they're
688 # already gone from the datastore, which is fine).
689 butler.pruneCollection(run1, purge=True, unstore=True)
690 with self.assertRaises(MissingCollectionError):
691 butler.registry.getCollectionType(run1)
692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
693 [ref2])
694 self.assertTrue(butler.datastore.exists(ref2))
695 # Remove run2. This removes ref2 from the registry and the datastore.
696 butler.pruneCollection(run2, purge=True, unstore=True)
697 with self.assertRaises(MissingCollectionError):
698 butler.registry.getCollectionType(run2)
699 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
700 [])
702 # Now that the collections have been pruned we can remove the
703 # dataset type
704 butler.registry.removeDatasetType(datasetType.name)
706 def testPickle(self):
707 """Test pickle support.
708 """
709 butler = Butler(self.tmpConfigFile, run="ingest")
710 butlerOut = pickle.loads(pickle.dumps(butler))
711 self.assertIsInstance(butlerOut, Butler)
712 self.assertEqual(butlerOut._config, butler._config)
713 self.assertEqual(butlerOut.collections, butler.collections)
714 self.assertEqual(butlerOut.run, butler.run)
716 def testGetDatasetTypes(self):
717 butler = Butler(self.tmpConfigFile, run="ingest")
718 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
719 dimensionEntries = [
720 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
721 {"instrument": "DummyCamComp"}),
722 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
723 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
724 ]
725 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
726 # Add needed Dimensions
727 for args in dimensionEntries:
728 butler.registry.insertDimensionData(*args)
730 # When a DatasetType is added to the registry entries are not created
731 # for components but querying them can return the components.
732 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
733 components = set()
734 for datasetTypeName in datasetTypeNames:
735 # Create and register a DatasetType
736 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
738 for componentName in storageClass.components:
739 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
741 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
742 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
744 # Now that we have some dataset types registered, validate them
745 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
746 "datasetType.component"])
748 # Add a new datasetType that will fail template validation
749 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
750 if self.validationCanFail:
751 with self.assertRaises(ValidationError):
752 butler.validateConfiguration()
754 # Rerun validation but with a subset of dataset type names
755 butler.validateConfiguration(datasetTypeNames=["metric4"])
757 # Rerun validation but ignore the bad datasetType
758 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
759 "datasetType.component"])
761 def testTransaction(self):
762 butler = Butler(self.tmpConfigFile, run="ingest")
763 datasetTypeName = "test_metric"
764 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
765 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
766 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
767 "band": "R"}),
768 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
769 "physical_filter": "d-r"}))
770 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
771 metric = makeExampleMetrics()
772 dataId = {"instrument": "DummyCam", "visit": 42}
773 # Create and register a DatasetType
774 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
775 with self.assertRaises(TransactionTestError):
776 with butler.transaction():
777 # Add needed Dimensions
778 for args in dimensionEntries:
779 butler.registry.insertDimensionData(*args)
780 # Store a dataset
781 ref = butler.put(metric, datasetTypeName, dataId)
782 self.assertIsInstance(ref, DatasetRef)
783 # Test getDirect
784 metricOut = butler.getDirect(ref)
785 self.assertEqual(metric, metricOut)
786 # Test get
787 metricOut = butler.get(datasetTypeName, dataId)
788 self.assertEqual(metric, metricOut)
789 # Check we can get components
790 self.assertGetComponents(butler, ref,
791 ("summary", "data", "output"), metric)
792 raise TransactionTestError("This should roll back the entire transaction")
793 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
794 butler.registry.expandDataId(dataId)
795 # Should raise LookupError for missing data ID value
796 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
797 butler.get(datasetTypeName, dataId)
798 # Also check explicitly if Dataset entry is missing
799 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
800 # Direct retrieval should not find the file in the Datastore
801 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
802 butler.getDirect(ref)
804 def testMakeRepo(self):
805 """Test that we can write butler configuration to a new repository via
806 the Butler.makeRepo interface and then instantiate a butler from the
807 repo root.
808 """
809 # Do not run the test if we know this datastore configuration does
810 # not support a file system root
811 if self.fullConfigKey is None:
812 return
814 # create two separate directories
815 root1 = tempfile.mkdtemp(dir=self.root)
816 root2 = tempfile.mkdtemp(dir=self.root)
818 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
819 limited = Config(self.configFile)
820 butler1 = Butler(butlerConfig)
821 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
822 full = Config(self.tmpConfigFile)
823 butler2 = Butler(butlerConfig)
824 # Butlers should have the same configuration regardless of whether
825 # defaults were expanded.
826 self.assertEqual(butler1._config, butler2._config)
827 # Config files loaded directly should not be the same.
828 self.assertNotEqual(limited, full)
829 # Make sure "limited" doesn't have a few keys we know it should be
830 # inheriting from defaults.
831 self.assertIn(self.fullConfigKey, full)
832 self.assertNotIn(self.fullConfigKey, limited)
834 # Collections don't appear until something is put in them
835 collections1 = set(butler1.registry.queryCollections())
836 self.assertEqual(collections1, set())
837 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
839 # Check that a config with no associated file name will not
840 # work properly with relocatable Butler repo
841 butlerConfig.configFile = None
842 with self.assertRaises(ValueError):
843 Butler(butlerConfig)
845 with self.assertRaises(FileExistsError):
846 Butler.makeRepo(self.root, standalone=True,
847 config=Config(self.configFile), overwrite=False)
849 def testStringification(self):
850 butler = Butler(self.tmpConfigFile, run="ingest")
851 butlerStr = str(butler)
853 if self.datastoreStr is not None:
854 for testStr in self.datastoreStr:
855 self.assertIn(testStr, butlerStr)
856 if self.registryStr is not None:
857 self.assertIn(self.registryStr, butlerStr)
859 datastoreName = butler.datastore.name
860 if self.datastoreName is not None:
861 for testStr in self.datastoreName:
862 self.assertIn(testStr, datastoreName)
865class FileDatastoreButlerTests(ButlerTests):
866 """Common tests and specialization of ButlerTests for butlers backed
867 by datastores that inherit from FileDatastore.
868 """
870 def checkFileExists(self, root, relpath):
871 """Checks if file exists at a given path (relative to root).
873 Test testPutTemplates verifies actual physical existance of the files
874 in the requested location.
875 """
876 uri = ButlerURI(root, forceDirectory=True)
877 return uri.join(relpath).exists()
879 def testPutTemplates(self):
880 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
881 butler = Butler(self.tmpConfigFile, run="ingest")
883 # Add needed Dimensions
884 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
885 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
886 "name": "d-r",
887 "band": "R"})
888 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
889 "physical_filter": "d-r"})
890 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
891 "physical_filter": "d-r"})
893 # Create and store a dataset
894 metric = makeExampleMetrics()
896 # Create two almost-identical DatasetTypes (both will use default
897 # template)
898 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
899 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
900 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
901 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
903 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
904 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
906 # Put with exactly the data ID keys needed
907 ref = butler.put(metric, "metric1", dataId1)
908 uri = butler.getURI(ref)
909 self.assertTrue(self.checkFileExists(butler.datastore.root,
910 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
911 f"Checking existence of {uri}")
913 # Check the template based on dimensions
914 butler.datastore.templates.validateTemplates([ref])
916 # Put with extra data ID keys (physical_filter is an optional
917 # dependency); should not change template (at least the way we're
918 # defining them to behave now; the important thing is that they
919 # must be consistent).
920 ref = butler.put(metric, "metric2", dataId2)
921 uri = butler.getURI(ref)
922 self.assertTrue(self.checkFileExists(butler.datastore.root,
923 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
924 f"Checking existence of {uri}")
926 # Check the template based on dimensions
927 butler.datastore.templates.validateTemplates([ref])
929 # Now use a file template that will not result in unique filenames
930 with self.assertRaises(FileTemplateValidationError):
931 butler.put(metric, "metric3", dataId1)
933 def testImportExport(self):
934 # Run put/get tests just to create and populate a repo.
935 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
936 self.runImportExportTest(storageClass)
938 @unittest.expectedFailure
939 def testImportExportVirtualComposite(self):
940 # Run put/get tests just to create and populate a repo.
941 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
942 self.runImportExportTest(storageClass)
944 def runImportExportTest(self, storageClass):
945 """This test does an export to a temp directory and an import back
946 into a new temp directory repo. It does not assume a posix datastore"""
947 exportButler = self.runPutGetTest(storageClass, "test_metric")
948 print("Root:", exportButler.datastore.root)
949 # Test that the repo actually has at least one dataset.
950 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
951 self.assertGreater(len(datasets), 0)
952 # Add a DimensionRecord that's unused by those datasets.
953 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
954 exportButler.registry.insertDimensionData("skymap", skymapRecord)
955 # Export and then import datasets.
956 with safeTestTempDir(TESTDIR) as exportDir:
957 exportFile = os.path.join(exportDir, "exports.yaml")
958 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
959 export.saveDatasets(datasets)
960 # Export the same datasets again. This should quietly do
961 # nothing because of internal deduplication, and it shouldn't
962 # complain about being asked to export the "htm7" elements even
963 # though there aren't any in these datasets or in the database.
964 export.saveDatasets(datasets, elements=["htm7"])
965 # Save one of the data IDs again; this should be harmless
966 # because of internal deduplication.
967 export.saveDataIds([datasets[0].dataId])
968 # Save some dimension records directly.
969 export.saveDimensionData("skymap", [skymapRecord])
970 self.assertTrue(os.path.exists(exportFile))
971 with safeTestTempDir(TESTDIR) as importDir:
972 # We always want this to be a local posix butler
973 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
974 # Calling script.butlerImport tests the implementation of the
975 # butler command line interface "import" subcommand. Functions
976 # in the script folder are generally considered protected and
977 # should not be used as public api.
978 with open(exportFile, "r") as f:
979 script.butlerImport(importDir, export_file=f,
980 directory=exportDir, transfer="auto", skip_dimensions=None)
981 importButler = Butler(importDir, run="ingest")
982 for ref in datasets:
983 with self.subTest(ref=ref):
984 # Test for existence by passing in the DatasetType and
985 # data ID separately, to avoid lookup by dataset_id.
986 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
987 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
988 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
991class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
992 """PosixDatastore specialization of a butler"""
993 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
994 fullConfigKey = ".datastore.formatters"
995 validationCanFail = True
996 datastoreStr = ["/tmp"]
997 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
998 registryStr = "/gen3.sqlite3"
1000 def testExportTransferCopy(self):
1001 """Test local export using all transfer modes"""
1002 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1003 exportButler = self.runPutGetTest(storageClass, "test_metric")
1004 # Test that the repo actually has at least one dataset.
1005 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1006 self.assertGreater(len(datasets), 0)
1007 uris = [exportButler.getURI(d) for d in datasets]
1008 datastoreRoot = exportButler.datastore.root
1010 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1012 for path in pathsInStore:
1013 # Assume local file system
1014 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1015 f"Checking path {path}")
1017 for transfer in ("copy", "link", "symlink", "relsymlink"):
1018 with safeTestTempDir(TESTDIR) as exportDir:
1019 with exportButler.export(directory=exportDir, format="yaml",
1020 transfer=transfer) as export:
1021 export.saveDatasets(datasets)
1022 for path in pathsInStore:
1023 self.assertTrue(self.checkFileExists(exportDir, path),
1024 f"Check that mode {transfer} exported files")
1027class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1028 """InMemoryDatastore specialization of a butler"""
1029 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1030 fullConfigKey = None
1031 useTempRoot = False
1032 validationCanFail = False
1033 datastoreStr = ["datastore='InMemory"]
1034 datastoreName = ["InMemoryDatastore@"]
1035 registryStr = "/gen3.sqlite3"
1037 def testIngest(self):
1038 pass
1041class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1042 """PosixDatastore specialization"""
1043 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1044 fullConfigKey = ".datastore.datastores.1.formatters"
1045 validationCanFail = True
1046 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1047 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1048 "SecondDatastore"]
1049 registryStr = "/gen3.sqlite3"
1052class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1053 """Test that a yaml file in one location can refer to a root in another."""
1055 datastoreStr = ["dir1"]
1056 # Disable the makeRepo test since we are deliberately not using
1057 # butler.yaml as the config name.
1058 fullConfigKey = None
1060 def setUp(self):
1061 self.root = makeTestTempDir(TESTDIR)
1063 # Make a new repository in one place
1064 self.dir1 = os.path.join(self.root, "dir1")
1065 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1067 # Move the yaml file to a different place and add a "root"
1068 self.dir2 = os.path.join(self.root, "dir2")
1069 safeMakeDir(self.dir2)
1070 configFile1 = os.path.join(self.dir1, "butler.yaml")
1071 config = Config(configFile1)
1072 config["root"] = self.dir1
1073 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1074 config.dumpToUri(configFile2)
1075 os.remove(configFile1)
1076 self.tmpConfigFile = configFile2
1078 def testFileLocations(self):
1079 self.assertNotEqual(self.dir1, self.dir2)
1080 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1081 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1082 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1085class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1086 """Test that a config file created by makeRepo outside of repo works."""
1088 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1090 def setUp(self):
1091 self.root = makeTestTempDir(TESTDIR)
1092 self.root2 = makeTestTempDir(TESTDIR)
1094 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1095 Butler.makeRepo(self.root, config=Config(self.configFile),
1096 outfile=self.tmpConfigFile)
1098 def tearDown(self):
1099 if os.path.exists(self.root2):
1100 shutil.rmtree(self.root2, ignore_errors=True)
1101 super().tearDown()
1103 def testConfigExistence(self):
1104 c = Config(self.tmpConfigFile)
1105 uri_config = ButlerURI(c["root"])
1106 uri_expected = ButlerURI(self.root, forceDirectory=True)
1107 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1108 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1110 def testPutGet(self):
1111 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1112 self.runPutGetTest(storageClass, "test_metric")
1115class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1116 """Test that a config file created by makeRepo outside of repo works."""
1118 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1120 def setUp(self):
1121 self.root = makeTestTempDir(TESTDIR)
1122 self.root2 = makeTestTempDir(TESTDIR)
1124 self.tmpConfigFile = self.root2
1125 Butler.makeRepo(self.root, config=Config(self.configFile),
1126 outfile=self.tmpConfigFile)
1128 def testConfigExistence(self):
1129 # Append the yaml file else Config constructor does not know the file
1130 # type.
1131 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1132 super().testConfigExistence()
1135class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1136 """Test that a config file created by makeRepo outside of repo works."""
1138 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1140 def setUp(self):
1141 self.root = makeTestTempDir(TESTDIR)
1142 self.root2 = makeTestTempDir(TESTDIR)
1144 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1145 Butler.makeRepo(self.root, config=Config(self.configFile),
1146 outfile=self.tmpConfigFile)
1149@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1150@mock_s3
1151class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1152 """S3Datastore specialization of a butler; an S3 storage Datastore +
1153 a local in-memory SqlRegistry.
1154 """
1155 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1156 fullConfigKey = None
1157 validationCanFail = True
1159 bucketName = "anybucketname"
1160 """Name of the Bucket that will be used in the tests. The name is read from
1161 the config file used with the tests during set-up.
1162 """
1164 root = "butlerRoot/"
1165 """Root repository directory expected to be used in case useTempRoot=False.
1166 Otherwise the root is set to a 20 characters long randomly generated string
1167 during set-up.
1168 """
1170 datastoreStr = [f"datastore={root}"]
1171 """Contains all expected root locations in a format expected to be
1172 returned by Butler stringification.
1173 """
1175 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1176 """The expected format of the S3 Datastore string."""
1178 registryStr = "/gen3.sqlite3"
1179 """Expected format of the Registry string."""
1181 def genRoot(self):
1182 """Returns a random string of len 20 to serve as a root
1183 name for the temporary bucket repo.
1185 This is equivalent to tempfile.mkdtemp as this is what self.root
1186 becomes when useTempRoot is True.
1187 """
1188 rndstr = "".join(
1189 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1190 )
1191 return rndstr + "/"
1193 def setUp(self):
1194 config = Config(self.configFile)
1195 uri = ButlerURI(config[".datastore.datastore.root"])
1196 self.bucketName = uri.netloc
1198 # set up some fake credentials if they do not exist
1199 self.usingDummyCredentials = setAwsEnvCredentials()
1201 if self.useTempRoot:
1202 self.root = self.genRoot()
1203 rooturi = f"s3://{self.bucketName}/{self.root}"
1204 config.update({"datastore": {"datastore": {"root": rooturi}}})
1206 # need local folder to store registry database
1207 self.reg_dir = makeTestTempDir(TESTDIR)
1208 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1210 # MOTO needs to know that we expect Bucket bucketname to exist
1211 # (this used to be the class attribute bucketName)
1212 s3 = boto3.resource("s3")
1213 s3.create_bucket(Bucket=self.bucketName)
1215 self.datastoreStr = f"datastore={self.root}"
1216 self.datastoreName = [f"FileDatastore@{rooturi}"]
1217 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1218 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1220 def tearDown(self):
1221 s3 = boto3.resource("s3")
1222 bucket = s3.Bucket(self.bucketName)
1223 try:
1224 bucket.objects.all().delete()
1225 except botocore.exceptions.ClientError as e:
1226 if e.response["Error"]["Code"] == "404":
1227 # the key was not reachable - pass
1228 pass
1229 else:
1230 raise
1232 bucket = s3.Bucket(self.bucketName)
1233 bucket.delete()
1235 # unset any potentially set dummy credentials
1236 if self.usingDummyCredentials:
1237 unsetAwsEnvCredentials()
1239 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1240 shutil.rmtree(self.reg_dir, ignore_errors=True)
1243@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1244# Mock required environment variables during tests
1245@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1246 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1247 TESTDIR, "config/testConfigs/webdav/token"),
1248 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1249class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1250 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1251 a local in-memory SqlRegistry.
1252 """
1253 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1254 fullConfigKey = None
1255 validationCanFail = True
1257 serverName = "localhost"
1258 """Name of the server that will be used in the tests.
1259 """
1261 portNumber = 8080
1262 """Port on which the webdav server listens. Automatically chosen
1263 at setUpClass via the _getfreeport() method
1264 """
1266 root = "butlerRoot/"
1267 """Root repository directory expected to be used in case useTempRoot=False.
1268 Otherwise the root is set to a 20 characters long randomly generated string
1269 during set-up.
1270 """
1272 datastoreStr = [f"datastore={root}"]
1273 """Contains all expected root locations in a format expected to be
1274 returned by Butler stringification.
1275 """
1277 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1278 """The expected format of the WebdavDatastore string."""
1280 registryStr = "/gen3.sqlite3"
1281 """Expected format of the Registry string."""
1283 serverThread = None
1284 """Thread in which the local webdav server will run"""
1286 stopWebdavServer = False
1287 """This flag will cause the webdav server to
1288 gracefully shut down when True
1289 """
1291 def genRoot(self):
1292 """Returns a random string of len 20 to serve as a root
1293 name for the temporary bucket repo.
1295 This is equivalent to tempfile.mkdtemp as this is what self.root
1296 becomes when useTempRoot is True.
1297 """
1298 rndstr = "".join(
1299 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1300 )
1301 return rndstr + "/"
1303 @classmethod
1304 def setUpClass(cls):
1305 # Do the same as inherited class
1306 cls.storageClassFactory = StorageClassFactory()
1307 cls.storageClassFactory.addFromConfig(cls.configFile)
1309 cls.portNumber = cls._getfreeport()
1310 # Run a local webdav server on which tests will be run
1311 cls.serverThread = Thread(target=cls._serveWebdav,
1312 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1313 daemon=True)
1314 cls.serverThread.start()
1315 # Wait for it to start
1316 time.sleep(3)
1318 @classmethod
1319 def tearDownClass(cls):
1320 # Ask for graceful shut down of the webdav server
1321 cls.stopWebdavServer = True
1322 # Wait for the thread to exit
1323 cls.serverThread.join()
1325 # Mock required environment variables during tests
1326 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1327 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1328 TESTDIR, "config/testConfigs/webdav/token"),
1329 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1330 def setUp(self):
1331 config = Config(self.configFile)
1333 if self.useTempRoot:
1334 self.root = self.genRoot()
1335 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1336 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1338 # need local folder to store registry database
1339 self.reg_dir = makeTestTempDir(TESTDIR)
1340 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1342 self.datastoreStr = f"datastore={self.root}"
1343 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1345 if not isWebdavEndpoint(self.rooturi):
1346 raise OSError("Webdav server not running properly: cannot run tests.")
1348 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1349 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1351 # Mock required environment variables during tests
1352 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1353 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1354 TESTDIR, "config/testConfigs/webdav/token"),
1355 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1356 def tearDown(self):
1357 # Clear temporary directory
1358 ButlerURI(self.rooturi).remove()
1359 ButlerURI(self.rooturi).session.close()
1361 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1362 shutil.rmtree(self.reg_dir, ignore_errors=True)
1364 def _serveWebdav(self, port: int, stopWebdavServer):
1365 """Starts a local webdav-compatible HTTP server,
1366 Listening on http://localhost:8080
1367 This server only runs when this test class is instantiated,
1368 and then shuts down. Must be started is a separate thread.
1370 Parameters
1371 ----------
1372 port : `int`
1373 The port number on which the server should listen
1374 """
1375 root_path = gettempdir()
1377 config = {
1378 "host": "0.0.0.0",
1379 "port": port,
1380 "provider_mapping": {"/": root_path},
1381 "http_authenticator": {
1382 "domain_controller": None
1383 },
1384 "simple_dc": {"user_mapping": {"*": True}},
1385 "verbose": 0,
1386 }
1387 app = WsgiDAVApp(config)
1389 server_args = {
1390 "bind_addr": (config["host"], config["port"]),
1391 "wsgi_app": app,
1392 }
1393 server = wsgi.Server(**server_args)
1394 server.prepare()
1396 try:
1397 # Start the actual server in a separate thread
1398 t = Thread(target=server.serve, daemon=True)
1399 t.start()
1400 # watch stopWebdavServer, and gracefully
1401 # shut down the server when True
1402 while True:
1403 if stopWebdavServer():
1404 break
1405 time.sleep(1)
1406 except KeyboardInterrupt:
1407 print("Caught Ctrl-C, shutting down...")
1408 finally:
1409 server.stop()
1410 t.join()
1412 def _getfreeport():
1413 """
1414 Determines a free port using sockets.
1415 """
1416 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1417 free_socket.bind(('0.0.0.0', 0))
1418 free_socket.listen()
1419 port = free_socket.getsockname()[1]
1420 free_socket.close()
1421 return port
1424if __name__ == "__main__": 1424 ↛ 1425line 1424 didn't jump to line 1425, because the condition on line 1424 was never true
1425 unittest.main()