Coverage for tests/test_butler.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import time
34import socket
36try:
37 import boto3
38 import botocore
39 from moto import mock_s3
40except ImportError:
41 boto3 = None
43 def mock_s3(cls):
44 """A no-op decorator in case moto mock_s3 can not be imported.
45 """
46 return cls
48try:
49 from cheroot import wsgi
50 from wsgidav.wsgidav_app import WsgiDAVApp
51except ImportError:
52 WsgiDAVApp = None
54import astropy.time
55from threading import Thread
56from tempfile import gettempdir
57from lsst.utils import doImport
58from lsst.daf.butler.core.utils import safeMakeDir
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
76TESTDIR = os.path.abspath(os.path.dirname(__file__))
79def makeExampleMetrics():
80 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
81 {"a": [1, 2, 3],
82 "b": {"blue": 5, "red": "green"}},
83 [563, 234, 456.7, 752, 8, 9, 27]
84 )
87class TransactionTestError(Exception):
88 """Specific error for testing transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
91 pass
94class ButlerConfigTests(unittest.TestCase):
95 """Simple tests for ButlerConfig that are not tested in other test cases.
96 """
98 def testSearchPath(self):
99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
101 config1 = ButlerConfig(configFile)
102 self.assertNotIn("testConfigs", "\n".join(cm.output))
104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
107 self.assertIn("testConfigs", "\n".join(cm.output))
109 key = ("datastore", "records", "table")
110 self.assertNotEqual(config1[key], config2[key])
111 self.assertEqual(config2[key], "override_record")
114class ButlerPutGetTests:
115 """Helper method for running a suite of put/get tests from different
116 butler configurations."""
118 root = None
120 @staticmethod
121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
122 """Create a DatasetType and register it
123 """
124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
125 registry.registerDatasetType(datasetType)
126 return datasetType
128 @classmethod
129 def setUpClass(cls):
130 cls.storageClassFactory = StorageClassFactory()
131 cls.storageClassFactory.addFromConfig(cls.configFile)
133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
134 datasetType = datasetRef.datasetType
135 dataId = datasetRef.dataId
136 deferred = butler.getDirectDeferred(datasetRef)
138 for component in components:
139 compTypeName = datasetType.componentTypeName(component)
140 result = butler.get(compTypeName, dataId, collections=collections)
141 self.assertEqual(result, getattr(reference, component))
142 result_deferred = deferred.get(component=component)
143 self.assertEqual(result_deferred, result)
145 def tearDown(self):
146 removeTestTempDir(self.root)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest"
152 butler = Butler(self.tmpConfigFile, run=run)
154 collections = set(butler.registry.queryCollections())
155 self.assertEqual(collections, set([run]))
157 # Create and register a DatasetType
158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
162 # Add needed Dimensions
163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
165 "name": "d-r",
166 "band": "R"})
167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
168 "id": 1,
169 "name": "default"})
170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
173 "name": "fourtwentythree", "physical_filter": "d-r",
174 "visit_system": 1, "datetime_begin": visit_start,
175 "datetime_end": visit_end})
177 # Add a second visit for some later tests
178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
179 "name": "fourtwentyfour", "physical_filter": "d-r",
180 "visit_system": 1})
182 # Create and store a dataset
183 metric = makeExampleMetrics()
184 dataId = {"instrument": "DummyCamComp", "visit": 423}
186 # Create a DatasetRef for put
187 refIn = DatasetRef(datasetType, dataId, id=None)
189 # Put with a preexisting id should fail
190 with self.assertRaises(ValueError):
191 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
193 # Put and remove the dataset once as a DatasetRef, once as a dataId,
194 # and once with a DatasetType
196 # Keep track of any collections we add and do not clean up
197 expected_collections = {run}
199 counter = 0
200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
201 # Since we are using subTest we can get cascading failures
202 # here with the first attempt failing and the others failing
203 # immediately because the dataset already exists. Work around
204 # this by using a distinct run collection each time
205 counter += 1
206 this_run = f"put_run_{counter}"
207 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
208 expected_collections.update({this_run})
210 with self.subTest(args=args):
211 ref = butler.put(metric, *args, run=this_run)
212 self.assertIsInstance(ref, DatasetRef)
214 # Test getDirect
215 metricOut = butler.getDirect(ref)
216 self.assertEqual(metric, metricOut)
217 # Test get
218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
219 self.assertEqual(metric, metricOut)
220 # Test get with a datasetRef
221 metricOut = butler.get(ref, collections=this_run)
222 self.assertEqual(metric, metricOut)
223 # Test getDeferred with dataId
224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
225 self.assertEqual(metric, metricOut)
226 # Test getDeferred with a datasetRef
227 metricOut = butler.getDeferred(ref, collections=this_run).get()
228 self.assertEqual(metric, metricOut)
229 # and deferred direct with ref
230 metricOut = butler.getDirectDeferred(ref).get()
231 self.assertEqual(metric, metricOut)
233 # Check we can get components
234 if storageClass.isComposite():
235 self.assertGetComponents(butler, ref,
236 ("summary", "data", "output"), metric,
237 collections=this_run)
239 # Can the artifacts themselves be retrieved?
240 if not butler.datastore.isEphemeral:
241 root_uri = ButlerURI(self.root)
243 for preserve_path in (True, False):
244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
245 transferred = butler.retrieveArtifacts([ref], destination,
246 preserve_path=preserve_path)
247 self.assertGreater(len(transferred), 0)
248 artifacts = list(ButlerURI.findFileResources([destination]))
249 self.assertEqual(set(transferred), set(artifacts))
251 for artifact in transferred:
252 path_in_destination = artifact.relative_to(destination)
253 self.assertIsNotNone(path_in_destination)
255 # when path is not preserved there should not be
256 # any path separators.
257 num_seps = path_in_destination.count("/")
258 if preserve_path:
259 self.assertGreater(num_seps, 0)
260 else:
261 self.assertEqual(num_seps, 0)
263 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
264 n_uris = len(secondary_uris)
265 if primary_uri:
266 n_uris += 1
267 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:"
268 f" {artifacts} vs {primary_uri} and {secondary_uris}")
270 if preserve_path:
271 # No need to run these twice
272 with self.assertRaises(ValueError):
273 butler.retrieveArtifacts([ref], destination, transfer="move")
275 with self.assertRaises(FileExistsError):
276 butler.retrieveArtifacts([ref], destination)
278 transferred_again = butler.retrieveArtifacts([ref], destination,
279 preserve_path=preserve_path,
280 overwrite=True)
281 self.assertEqual(set(transferred_again), set(transferred))
283 # Now remove the dataset completely.
284 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
285 # Lookup with original args should still fail.
286 with self.assertRaises(LookupError):
287 butler.datasetExists(*args, collections=this_run)
288 # getDirect() should still fail.
289 with self.assertRaises(FileNotFoundError):
290 butler.getDirect(ref)
291 # Registry shouldn't be able to find it by dataset_id anymore.
292 self.assertIsNone(butler.registry.getDataset(ref.id))
294 # Do explicit registry removal since we know they are
295 # empty
296 butler.registry.removeCollection(this_run)
297 expected_collections.remove(this_run)
299 # Put the dataset again, since the last thing we did was remove it
300 # and we want to use the default collection.
301 ref = butler.put(metric, refIn)
303 # Get with parameters
304 stop = 4
305 sliced = butler.get(ref, parameters={"slice": slice(stop)})
306 self.assertNotEqual(metric, sliced)
307 self.assertEqual(metric.summary, sliced.summary)
308 self.assertEqual(metric.output, sliced.output)
309 self.assertEqual(metric.data[:stop], sliced.data)
310 # getDeferred with parameters
311 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
312 self.assertNotEqual(metric, sliced)
313 self.assertEqual(metric.summary, sliced.summary)
314 self.assertEqual(metric.output, sliced.output)
315 self.assertEqual(metric.data[:stop], sliced.data)
316 # getDeferred with deferred parameters
317 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
318 self.assertNotEqual(metric, sliced)
319 self.assertEqual(metric.summary, sliced.summary)
320 self.assertEqual(metric.output, sliced.output)
321 self.assertEqual(metric.data[:stop], sliced.data)
323 if storageClass.isComposite():
324 # Check that components can be retrieved
325 metricOut = butler.get(ref.datasetType.name, dataId)
326 compNameS = ref.datasetType.componentTypeName("summary")
327 compNameD = ref.datasetType.componentTypeName("data")
328 summary = butler.get(compNameS, dataId)
329 self.assertEqual(summary, metric.summary)
330 data = butler.get(compNameD, dataId)
331 self.assertEqual(data, metric.data)
333 if "counter" in storageClass.derivedComponents:
334 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
335 self.assertEqual(count, len(data))
337 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
338 parameters={"slice": slice(stop)})
339 self.assertEqual(count, stop)
341 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
342 summary = butler.getDirect(compRef)
343 self.assertEqual(summary, metric.summary)
345 # Create a Dataset type that has the same name but is inconsistent.
346 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
347 self.storageClassFactory.getStorageClass("Config"))
349 # Getting with a dataset type that does not match registry fails
350 with self.assertRaises(ValueError):
351 butler.get(inconsistentDatasetType, dataId)
353 # Combining a DatasetRef with a dataId should fail
354 with self.assertRaises(ValueError):
355 butler.get(ref, dataId)
356 # Getting with an explicit ref should fail if the id doesn't match
357 with self.assertRaises(ValueError):
358 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
360 # Getting a dataset with unknown parameters should fail
361 with self.assertRaises(KeyError):
362 butler.get(ref, parameters={"unsupported": True})
364 # Check we have a collection
365 collections = set(butler.registry.queryCollections())
366 self.assertEqual(collections, expected_collections)
368 # Clean up to check that we can remove something that may have
369 # already had a component removed
370 butler.pruneDatasets([ref], unstore=True, purge=True)
372 # Add a dataset back in since some downstream tests require
373 # something to be present
374 ref = butler.put(metric, refIn)
376 return butler
378 def testDeferredCollectionPassing(self):
379 # Construct a butler with no run or collection, but make it writeable.
380 butler = Butler(self.tmpConfigFile, writeable=True)
381 # Create and register a DatasetType
382 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
383 datasetType = self.addDatasetType("example", dimensions,
384 self.storageClassFactory.getStorageClass("StructuredData"),
385 butler.registry)
386 # Add needed Dimensions
387 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
388 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
389 "name": "d-r",
390 "band": "R"})
391 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
392 "name": "fourtwentythree", "physical_filter": "d-r"})
393 dataId = {"instrument": "DummyCamComp", "visit": 423}
394 # Create dataset.
395 metric = makeExampleMetrics()
396 # Register a new run and put dataset.
397 run = "deferred"
398 butler.registry.registerRun(run)
399 ref = butler.put(metric, datasetType, dataId, run=run)
400 # Putting with no run should fail with TypeError.
401 with self.assertRaises(TypeError):
402 butler.put(metric, datasetType, dataId)
403 # Dataset should exist.
404 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
405 # We should be able to get the dataset back, but with and without
406 # a deferred dataset handle.
407 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
408 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
409 # Trying to find the dataset without any collection is a TypeError.
410 with self.assertRaises(TypeError):
411 butler.datasetExists(datasetType, dataId)
412 with self.assertRaises(TypeError):
413 butler.get(datasetType, dataId)
414 # Associate the dataset with a different collection.
415 butler.registry.registerCollection("tagged")
416 butler.registry.associate("tagged", [ref])
417 # Deleting the dataset from the new collection should make it findable
418 # in the original collection.
419 butler.pruneDatasets([ref], tags=["tagged"])
420 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
423class ButlerTests(ButlerPutGetTests):
424 """Tests for Butler.
425 """
426 useTempRoot = True
428 def setUp(self):
429 """Create a new butler root for each test."""
430 self.root = makeTestTempDir(TESTDIR)
431 Butler.makeRepo(self.root, config=Config(self.configFile))
432 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
434 def testConstructor(self):
435 """Independent test of constructor.
436 """
437 butler = Butler(self.tmpConfigFile, run="ingest")
438 self.assertIsInstance(butler, Butler)
440 collections = set(butler.registry.queryCollections())
441 self.assertEqual(collections, {"ingest"})
443 butler2 = Butler(butler=butler, collections=["other"])
444 self.assertEqual(
445 butler2.collections,
446 CollectionSearch.fromExpression(["other"])
447 )
448 self.assertIsNone(butler2.run)
449 self.assertIs(butler.datastore, butler2.datastore)
451 def testBasicPutGet(self):
452 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
453 self.runPutGetTest(storageClass, "test_metric")
455 def testCompositePutGetConcrete(self):
457 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
458 butler = self.runPutGetTest(storageClass, "test_metric")
460 # Should *not* be disassembled
461 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
462 self.assertEqual(len(datasets), 1)
463 uri, components = butler.getURIs(datasets[0])
464 self.assertIsInstance(uri, ButlerURI)
465 self.assertFalse(components)
466 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
467 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
469 # Predicted dataset
470 dataId = {"instrument": "DummyCamComp", "visit": 424}
471 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
472 self.assertFalse(components)
473 self.assertIsInstance(uri, ButlerURI)
474 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
475 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
477 def testCompositePutGetVirtual(self):
478 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
479 butler = self.runPutGetTest(storageClass, "test_metric_comp")
481 # Should be disassembled
482 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
483 self.assertEqual(len(datasets), 1)
484 uri, components = butler.getURIs(datasets[0])
486 if butler.datastore.isEphemeral:
487 # Never disassemble in-memory datastore
488 self.assertIsInstance(uri, ButlerURI)
489 self.assertFalse(components)
490 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
491 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
492 else:
493 self.assertIsNone(uri)
494 self.assertEqual(set(components), set(storageClass.components))
495 for compuri in components.values():
496 self.assertIsInstance(compuri, ButlerURI)
497 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
498 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
500 # Predicted dataset
501 dataId = {"instrument": "DummyCamComp", "visit": 424}
502 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
504 if butler.datastore.isEphemeral:
505 # Never disassembled
506 self.assertIsInstance(uri, ButlerURI)
507 self.assertFalse(components)
508 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
509 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
510 else:
511 self.assertIsNone(uri)
512 self.assertEqual(set(components), set(storageClass.components))
513 for compuri in components.values():
514 self.assertIsInstance(compuri, ButlerURI)
515 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
516 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
518 def testIngest(self):
519 butler = Butler(self.tmpConfigFile, run="ingest")
521 # Create and register a DatasetType
522 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
524 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
525 datasetTypeName = "metric"
527 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
529 # Add needed Dimensions
530 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
531 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
532 "name": "d-r",
533 "band": "R"})
534 for detector in (1, 2):
535 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
536 "full_name": f"detector{detector}"})
538 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
539 "name": "fourtwentythree", "physical_filter": "d-r"},
540 {"instrument": "DummyCamComp", "id": 424,
541 "name": "fourtwentyfour", "physical_filter": "d-r"})
543 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
544 dataRoot = os.path.join(TESTDIR, "data", "basic")
545 datasets = []
546 for detector in (1, 2):
547 detector_name = f"detector_{detector}"
548 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
549 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
550 # Create a DatasetRef for ingest
551 refIn = DatasetRef(datasetType, dataId, id=None)
553 datasets.append(FileDataset(path=metricFile,
554 refs=[refIn],
555 formatter=formatter))
557 butler.ingest(*datasets, transfer="copy")
559 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
560 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
562 metrics1 = butler.get(datasetTypeName, dataId1)
563 metrics2 = butler.get(datasetTypeName, dataId2)
564 self.assertNotEqual(metrics1, metrics2)
566 # Compare URIs
567 uri1 = butler.getURI(datasetTypeName, dataId1)
568 uri2 = butler.getURI(datasetTypeName, dataId2)
569 self.assertNotEqual(uri1, uri2)
571 # Now do a multi-dataset but single file ingest
572 metricFile = os.path.join(dataRoot, "detectors.yaml")
573 refs = []
574 for detector in (1, 2):
575 detector_name = f"detector_{detector}"
576 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
577 # Create a DatasetRef for ingest
578 refs.append(DatasetRef(datasetType, dataId, id=None))
580 datasets = []
581 datasets.append(FileDataset(path=metricFile,
582 refs=refs,
583 formatter=MultiDetectorFormatter))
585 butler.ingest(*datasets, transfer="copy")
587 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
588 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
590 multi1 = butler.get(datasetTypeName, dataId1)
591 multi2 = butler.get(datasetTypeName, dataId2)
593 self.assertEqual(multi1, metrics1)
594 self.assertEqual(multi2, metrics2)
596 # Compare URIs
597 uri1 = butler.getURI(datasetTypeName, dataId1)
598 uri2 = butler.getURI(datasetTypeName, dataId2)
599 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
601 # Test that removing one does not break the second
602 # This line will issue a warning log message for a ChainedDatastore
603 # that uses an InMemoryDatastore since in-memory can not ingest
604 # files.
605 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
606 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
607 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
608 multi2b = butler.get(datasetTypeName, dataId2)
609 self.assertEqual(multi2, multi2b)
611 def testPruneCollections(self):
612 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
613 butler = Butler(self.tmpConfigFile, writeable=True)
614 # Load registry data with dimensions to hang datasets off of.
615 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
616 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
617 # Add some RUN-type collections.
618 run1 = "run1"
619 butler.registry.registerRun(run1)
620 run2 = "run2"
621 butler.registry.registerRun(run2)
622 # put some datasets. ref1 and ref2 have the same data ID, and are in
623 # different runs. ref3 has a different data ID.
624 metric = makeExampleMetrics()
625 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
626 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
627 butler.registry)
628 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
629 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
630 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
632 # Try to delete a RUN collection without purge, or with purge and not
633 # unstore.
634 with self.assertRaises(TypeError):
635 butler.pruneCollection(run1)
636 with self.assertRaises(TypeError):
637 butler.pruneCollection(run2, purge=True)
638 # Add a TAGGED collection and associate ref3 only into it.
639 tag1 = "tag1"
640 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
641 butler.registry.associate(tag1, [ref3])
642 # Add a CHAINED collection that searches run1 and then run2. It
643 # logically contains only ref1, because ref2 is shadowed due to them
644 # having the same data ID and dataset type.
645 chain1 = "chain1"
646 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
647 butler.registry.setCollectionChain(chain1, [run1, run2])
648 # Try to delete RUN collections, which should fail with complete
649 # rollback because they're still referenced by the CHAINED
650 # collection.
651 with self.assertRaises(Exception):
652 butler.pruneCollection(run1, pruge=True, unstore=True)
653 with self.assertRaises(Exception):
654 butler.pruneCollection(run2, pruge=True, unstore=True)
655 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
656 [ref1, ref2, ref3])
657 self.assertTrue(butler.datastore.exists(ref1))
658 self.assertTrue(butler.datastore.exists(ref2))
659 self.assertTrue(butler.datastore.exists(ref3))
660 # Try to delete CHAINED and TAGGED collections with purge; should not
661 # work.
662 with self.assertRaises(TypeError):
663 butler.pruneCollection(tag1, purge=True, unstore=True)
664 with self.assertRaises(TypeError):
665 butler.pruneCollection(chain1, purge=True, unstore=True)
666 # Remove the tagged collection with unstore=False. This should not
667 # affect the datasets.
668 butler.pruneCollection(tag1)
669 with self.assertRaises(MissingCollectionError):
670 butler.registry.getCollectionType(tag1)
671 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
672 [ref1, ref2, ref3])
673 self.assertTrue(butler.datastore.exists(ref1))
674 self.assertTrue(butler.datastore.exists(ref2))
675 self.assertTrue(butler.datastore.exists(ref3))
676 # Add the tagged collection back in, and remove it with unstore=True.
677 # This should remove ref3 only from the datastore.
678 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
679 butler.registry.associate(tag1, [ref3])
680 butler.pruneCollection(tag1, unstore=True)
681 with self.assertRaises(MissingCollectionError):
682 butler.registry.getCollectionType(tag1)
683 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
684 [ref1, ref2, ref3])
685 self.assertTrue(butler.datastore.exists(ref1))
686 self.assertTrue(butler.datastore.exists(ref2))
687 self.assertFalse(butler.datastore.exists(ref3))
688 # Delete the chain with unstore=False. The datasets should not be
689 # affected at all.
690 butler.pruneCollection(chain1)
691 with self.assertRaises(MissingCollectionError):
692 butler.registry.getCollectionType(chain1)
693 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
694 [ref1, ref2, ref3])
695 self.assertTrue(butler.datastore.exists(ref1))
696 self.assertTrue(butler.datastore.exists(ref2))
697 self.assertFalse(butler.datastore.exists(ref3))
698 # Redefine and then delete the chain with unstore=True. Only ref1
699 # should be unstored (ref3 has already been unstored, but otherwise
700 # would be now).
701 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
702 butler.registry.setCollectionChain(chain1, [run1, run2])
703 butler.pruneCollection(chain1, unstore=True)
704 with self.assertRaises(MissingCollectionError):
705 butler.registry.getCollectionType(chain1)
706 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
707 [ref1, ref2, ref3])
708 self.assertFalse(butler.datastore.exists(ref1))
709 self.assertTrue(butler.datastore.exists(ref2))
710 self.assertFalse(butler.datastore.exists(ref3))
711 # Remove run1. This removes ref1 and ref3 from the registry (they're
712 # already gone from the datastore, which is fine).
713 butler.pruneCollection(run1, purge=True, unstore=True)
714 with self.assertRaises(MissingCollectionError):
715 butler.registry.getCollectionType(run1)
716 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
717 [ref2])
718 self.assertTrue(butler.datastore.exists(ref2))
719 # Remove run2. This removes ref2 from the registry and the datastore.
720 butler.pruneCollection(run2, purge=True, unstore=True)
721 with self.assertRaises(MissingCollectionError):
722 butler.registry.getCollectionType(run2)
723 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
724 [])
726 # Now that the collections have been pruned we can remove the
727 # dataset type
728 butler.registry.removeDatasetType(datasetType.name)
730 def testPickle(self):
731 """Test pickle support.
732 """
733 butler = Butler(self.tmpConfigFile, run="ingest")
734 butlerOut = pickle.loads(pickle.dumps(butler))
735 self.assertIsInstance(butlerOut, Butler)
736 self.assertEqual(butlerOut._config, butler._config)
737 self.assertEqual(butlerOut.collections, butler.collections)
738 self.assertEqual(butlerOut.run, butler.run)
740 def testGetDatasetTypes(self):
741 butler = Butler(self.tmpConfigFile, run="ingest")
742 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
743 dimensionEntries = [
744 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
745 {"instrument": "DummyCamComp"}),
746 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
747 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
748 ]
749 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
750 # Add needed Dimensions
751 for args in dimensionEntries:
752 butler.registry.insertDimensionData(*args)
754 # When a DatasetType is added to the registry entries are not created
755 # for components but querying them can return the components.
756 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
757 components = set()
758 for datasetTypeName in datasetTypeNames:
759 # Create and register a DatasetType
760 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
762 for componentName in storageClass.components:
763 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
765 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
766 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
768 # Now that we have some dataset types registered, validate them
769 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
770 "datasetType.component"])
772 # Add a new datasetType that will fail template validation
773 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
774 if self.validationCanFail:
775 with self.assertRaises(ValidationError):
776 butler.validateConfiguration()
778 # Rerun validation but with a subset of dataset type names
779 butler.validateConfiguration(datasetTypeNames=["metric4"])
781 # Rerun validation but ignore the bad datasetType
782 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
783 "datasetType.component"])
785 def testTransaction(self):
786 butler = Butler(self.tmpConfigFile, run="ingest")
787 datasetTypeName = "test_metric"
788 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
789 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
790 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
791 "band": "R"}),
792 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
793 "physical_filter": "d-r"}))
794 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
795 metric = makeExampleMetrics()
796 dataId = {"instrument": "DummyCam", "visit": 42}
797 # Create and register a DatasetType
798 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
799 with self.assertRaises(TransactionTestError):
800 with butler.transaction():
801 # Add needed Dimensions
802 for args in dimensionEntries:
803 butler.registry.insertDimensionData(*args)
804 # Store a dataset
805 ref = butler.put(metric, datasetTypeName, dataId)
806 self.assertIsInstance(ref, DatasetRef)
807 # Test getDirect
808 metricOut = butler.getDirect(ref)
809 self.assertEqual(metric, metricOut)
810 # Test get
811 metricOut = butler.get(datasetTypeName, dataId)
812 self.assertEqual(metric, metricOut)
813 # Check we can get components
814 self.assertGetComponents(butler, ref,
815 ("summary", "data", "output"), metric)
816 raise TransactionTestError("This should roll back the entire transaction")
817 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
818 butler.registry.expandDataId(dataId)
819 # Should raise LookupError for missing data ID value
820 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
821 butler.get(datasetTypeName, dataId)
822 # Also check explicitly if Dataset entry is missing
823 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
824 # Direct retrieval should not find the file in the Datastore
825 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
826 butler.getDirect(ref)
828 def testMakeRepo(self):
829 """Test that we can write butler configuration to a new repository via
830 the Butler.makeRepo interface and then instantiate a butler from the
831 repo root.
832 """
833 # Do not run the test if we know this datastore configuration does
834 # not support a file system root
835 if self.fullConfigKey is None:
836 return
838 # create two separate directories
839 root1 = tempfile.mkdtemp(dir=self.root)
840 root2 = tempfile.mkdtemp(dir=self.root)
842 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
843 limited = Config(self.configFile)
844 butler1 = Butler(butlerConfig)
845 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
846 full = Config(self.tmpConfigFile)
847 butler2 = Butler(butlerConfig)
848 # Butlers should have the same configuration regardless of whether
849 # defaults were expanded.
850 self.assertEqual(butler1._config, butler2._config)
851 # Config files loaded directly should not be the same.
852 self.assertNotEqual(limited, full)
853 # Make sure "limited" doesn't have a few keys we know it should be
854 # inheriting from defaults.
855 self.assertIn(self.fullConfigKey, full)
856 self.assertNotIn(self.fullConfigKey, limited)
858 # Collections don't appear until something is put in them
859 collections1 = set(butler1.registry.queryCollections())
860 self.assertEqual(collections1, set())
861 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
863 # Check that a config with no associated file name will not
864 # work properly with relocatable Butler repo
865 butlerConfig.configFile = None
866 with self.assertRaises(ValueError):
867 Butler(butlerConfig)
869 with self.assertRaises(FileExistsError):
870 Butler.makeRepo(self.root, standalone=True,
871 config=Config(self.configFile), overwrite=False)
873 def testStringification(self):
874 butler = Butler(self.tmpConfigFile, run="ingest")
875 butlerStr = str(butler)
877 if self.datastoreStr is not None:
878 for testStr in self.datastoreStr:
879 self.assertIn(testStr, butlerStr)
880 if self.registryStr is not None:
881 self.assertIn(self.registryStr, butlerStr)
883 datastoreName = butler.datastore.name
884 if self.datastoreName is not None:
885 for testStr in self.datastoreName:
886 self.assertIn(testStr, datastoreName)
889class FileDatastoreButlerTests(ButlerTests):
890 """Common tests and specialization of ButlerTests for butlers backed
891 by datastores that inherit from FileDatastore.
892 """
894 def checkFileExists(self, root, relpath):
895 """Checks if file exists at a given path (relative to root).
897 Test testPutTemplates verifies actual physical existance of the files
898 in the requested location.
899 """
900 uri = ButlerURI(root, forceDirectory=True)
901 return uri.join(relpath).exists()
903 def testPutTemplates(self):
904 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
905 butler = Butler(self.tmpConfigFile, run="ingest")
907 # Add needed Dimensions
908 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
909 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
910 "name": "d-r",
911 "band": "R"})
912 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
913 "physical_filter": "d-r"})
914 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
915 "physical_filter": "d-r"})
917 # Create and store a dataset
918 metric = makeExampleMetrics()
920 # Create two almost-identical DatasetTypes (both will use default
921 # template)
922 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
923 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
924 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
925 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
927 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
928 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
930 # Put with exactly the data ID keys needed
931 ref = butler.put(metric, "metric1", dataId1)
932 uri = butler.getURI(ref)
933 self.assertTrue(self.checkFileExists(butler.datastore.root,
934 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
935 f"Checking existence of {uri}")
937 # Check the template based on dimensions
938 butler.datastore.templates.validateTemplates([ref])
940 # Put with extra data ID keys (physical_filter is an optional
941 # dependency); should not change template (at least the way we're
942 # defining them to behave now; the important thing is that they
943 # must be consistent).
944 ref = butler.put(metric, "metric2", dataId2)
945 uri = butler.getURI(ref)
946 self.assertTrue(self.checkFileExists(butler.datastore.root,
947 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
948 f"Checking existence of {uri}")
950 # Check the template based on dimensions
951 butler.datastore.templates.validateTemplates([ref])
953 # Now use a file template that will not result in unique filenames
954 with self.assertRaises(FileTemplateValidationError):
955 butler.put(metric, "metric3", dataId1)
957 def testImportExport(self):
958 # Run put/get tests just to create and populate a repo.
959 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
960 self.runImportExportTest(storageClass)
962 @unittest.expectedFailure
963 def testImportExportVirtualComposite(self):
964 # Run put/get tests just to create and populate a repo.
965 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
966 self.runImportExportTest(storageClass)
968 def runImportExportTest(self, storageClass):
969 """This test does an export to a temp directory and an import back
970 into a new temp directory repo. It does not assume a posix datastore"""
971 exportButler = self.runPutGetTest(storageClass, "test_metric")
972 print("Root:", exportButler.datastore.root)
973 # Test that the repo actually has at least one dataset.
974 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
975 self.assertGreater(len(datasets), 0)
976 # Add a DimensionRecord that's unused by those datasets.
977 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
978 exportButler.registry.insertDimensionData("skymap", skymapRecord)
979 # Export and then import datasets.
980 with safeTestTempDir(TESTDIR) as exportDir:
981 exportFile = os.path.join(exportDir, "exports.yaml")
982 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
983 export.saveDatasets(datasets)
984 # Export the same datasets again. This should quietly do
985 # nothing because of internal deduplication, and it shouldn't
986 # complain about being asked to export the "htm7" elements even
987 # though there aren't any in these datasets or in the database.
988 export.saveDatasets(datasets, elements=["htm7"])
989 # Save one of the data IDs again; this should be harmless
990 # because of internal deduplication.
991 export.saveDataIds([datasets[0].dataId])
992 # Save some dimension records directly.
993 export.saveDimensionData("skymap", [skymapRecord])
994 self.assertTrue(os.path.exists(exportFile))
995 with safeTestTempDir(TESTDIR) as importDir:
996 # We always want this to be a local posix butler
997 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
998 # Calling script.butlerImport tests the implementation of the
999 # butler command line interface "import" subcommand. Functions
1000 # in the script folder are generally considered protected and
1001 # should not be used as public api.
1002 with open(exportFile, "r") as f:
1003 script.butlerImport(importDir, export_file=f, directory=exportDir,
1004 transfer="auto", skip_dimensions=None, reuse_ids=False)
1005 importButler = Butler(importDir, run="ingest")
1006 for ref in datasets:
1007 with self.subTest(ref=ref):
1008 # Test for existence by passing in the DatasetType and
1009 # data ID separately, to avoid lookup by dataset_id.
1010 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1011 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1012 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1014 def testRemoveRuns(self):
1015 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1016 butler = Butler(self.tmpConfigFile, writeable=True)
1017 # Load registry data with dimensions to hang datasets off of.
1018 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1019 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1020 # Add some RUN-type collection.
1021 run1 = "run1"
1022 butler.registry.registerRun(run1)
1023 run2 = "run2"
1024 butler.registry.registerRun(run2)
1025 # put a dataset in each
1026 metric = makeExampleMetrics()
1027 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1028 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1029 butler.registry)
1030 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1031 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1032 uri1 = butler.getURI(ref1, collections=[run1])
1033 uri2 = butler.getURI(ref2, collections=[run2])
1034 # Remove from both runs with different values for unstore.
1035 butler.removeRuns([run1], unstore=True)
1036 butler.removeRuns([run2], unstore=False)
1037 # Should be nothing in registry for either one, and datastore should
1038 # not think either exists.
1039 with self.assertRaises(MissingCollectionError):
1040 butler.registry.getCollectionType(run1)
1041 with self.assertRaises(MissingCollectionError):
1042 butler.registry.getCollectionType(run2)
1043 self.assertFalse(butler.datastore.exists(ref1))
1044 self.assertFalse(butler.datastore.exists(ref2))
1045 # The ref we unstored should be gone according to the URI, but the
1046 # one we forgot should still be around.
1047 self.assertFalse(uri1.exists())
1048 self.assertTrue(uri2.exists())
1051class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1052 """PosixDatastore specialization of a butler"""
1053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1054 fullConfigKey = ".datastore.formatters"
1055 validationCanFail = True
1056 datastoreStr = ["/tmp"]
1057 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1058 registryStr = "/gen3.sqlite3"
1060 def testExportTransferCopy(self):
1061 """Test local export using all transfer modes"""
1062 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1063 exportButler = self.runPutGetTest(storageClass, "test_metric")
1064 # Test that the repo actually has at least one dataset.
1065 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1066 self.assertGreater(len(datasets), 0)
1067 uris = [exportButler.getURI(d) for d in datasets]
1068 datastoreRoot = exportButler.datastore.root
1070 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1072 for path in pathsInStore:
1073 # Assume local file system
1074 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1075 f"Checking path {path}")
1077 for transfer in ("copy", "link", "symlink", "relsymlink"):
1078 with safeTestTempDir(TESTDIR) as exportDir:
1079 with exportButler.export(directory=exportDir, format="yaml",
1080 transfer=transfer) as export:
1081 export.saveDatasets(datasets)
1082 for path in pathsInStore:
1083 self.assertTrue(self.checkFileExists(exportDir, path),
1084 f"Check that mode {transfer} exported files")
1087class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1088 """InMemoryDatastore specialization of a butler"""
1089 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1090 fullConfigKey = None
1091 useTempRoot = False
1092 validationCanFail = False
1093 datastoreStr = ["datastore='InMemory"]
1094 datastoreName = ["InMemoryDatastore@"]
1095 registryStr = "/gen3.sqlite3"
1097 def testIngest(self):
1098 pass
1101class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1102 """PosixDatastore specialization"""
1103 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1104 fullConfigKey = ".datastore.datastores.1.formatters"
1105 validationCanFail = True
1106 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1107 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1108 "SecondDatastore"]
1109 registryStr = "/gen3.sqlite3"
1112class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1113 """Test that a yaml file in one location can refer to a root in another."""
1115 datastoreStr = ["dir1"]
1116 # Disable the makeRepo test since we are deliberately not using
1117 # butler.yaml as the config name.
1118 fullConfigKey = None
1120 def setUp(self):
1121 self.root = makeTestTempDir(TESTDIR)
1123 # Make a new repository in one place
1124 self.dir1 = os.path.join(self.root, "dir1")
1125 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1127 # Move the yaml file to a different place and add a "root"
1128 self.dir2 = os.path.join(self.root, "dir2")
1129 safeMakeDir(self.dir2)
1130 configFile1 = os.path.join(self.dir1, "butler.yaml")
1131 config = Config(configFile1)
1132 config["root"] = self.dir1
1133 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1134 config.dumpToUri(configFile2)
1135 os.remove(configFile1)
1136 self.tmpConfigFile = configFile2
1138 def testFileLocations(self):
1139 self.assertNotEqual(self.dir1, self.dir2)
1140 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1141 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1142 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1145class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1146 """Test that a config file created by makeRepo outside of repo works."""
1148 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1150 def setUp(self):
1151 self.root = makeTestTempDir(TESTDIR)
1152 self.root2 = makeTestTempDir(TESTDIR)
1154 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1155 Butler.makeRepo(self.root, config=Config(self.configFile),
1156 outfile=self.tmpConfigFile)
1158 def tearDown(self):
1159 if os.path.exists(self.root2):
1160 shutil.rmtree(self.root2, ignore_errors=True)
1161 super().tearDown()
1163 def testConfigExistence(self):
1164 c = Config(self.tmpConfigFile)
1165 uri_config = ButlerURI(c["root"])
1166 uri_expected = ButlerURI(self.root, forceDirectory=True)
1167 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1168 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1170 def testPutGet(self):
1171 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1172 self.runPutGetTest(storageClass, "test_metric")
1175class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1176 """Test that a config file created by makeRepo outside of repo works."""
1178 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1180 def setUp(self):
1181 self.root = makeTestTempDir(TESTDIR)
1182 self.root2 = makeTestTempDir(TESTDIR)
1184 self.tmpConfigFile = self.root2
1185 Butler.makeRepo(self.root, config=Config(self.configFile),
1186 outfile=self.tmpConfigFile)
1188 def testConfigExistence(self):
1189 # Append the yaml file else Config constructor does not know the file
1190 # type.
1191 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1192 super().testConfigExistence()
1195class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1196 """Test that a config file created by makeRepo outside of repo works."""
1198 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1200 def setUp(self):
1201 self.root = makeTestTempDir(TESTDIR)
1202 self.root2 = makeTestTempDir(TESTDIR)
1204 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1205 Butler.makeRepo(self.root, config=Config(self.configFile),
1206 outfile=self.tmpConfigFile)
1209@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1210@mock_s3
1211class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1212 """S3Datastore specialization of a butler; an S3 storage Datastore +
1213 a local in-memory SqlRegistry.
1214 """
1215 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1216 fullConfigKey = None
1217 validationCanFail = True
1219 bucketName = "anybucketname"
1220 """Name of the Bucket that will be used in the tests. The name is read from
1221 the config file used with the tests during set-up.
1222 """
1224 root = "butlerRoot/"
1225 """Root repository directory expected to be used in case useTempRoot=False.
1226 Otherwise the root is set to a 20 characters long randomly generated string
1227 during set-up.
1228 """
1230 datastoreStr = [f"datastore={root}"]
1231 """Contains all expected root locations in a format expected to be
1232 returned by Butler stringification.
1233 """
1235 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1236 """The expected format of the S3 Datastore string."""
1238 registryStr = "/gen3.sqlite3"
1239 """Expected format of the Registry string."""
1241 def genRoot(self):
1242 """Returns a random string of len 20 to serve as a root
1243 name for the temporary bucket repo.
1245 This is equivalent to tempfile.mkdtemp as this is what self.root
1246 becomes when useTempRoot is True.
1247 """
1248 rndstr = "".join(
1249 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1250 )
1251 return rndstr + "/"
1253 def setUp(self):
1254 config = Config(self.configFile)
1255 uri = ButlerURI(config[".datastore.datastore.root"])
1256 self.bucketName = uri.netloc
1258 # set up some fake credentials if they do not exist
1259 self.usingDummyCredentials = setAwsEnvCredentials()
1261 if self.useTempRoot:
1262 self.root = self.genRoot()
1263 rooturi = f"s3://{self.bucketName}/{self.root}"
1264 config.update({"datastore": {"datastore": {"root": rooturi}}})
1266 # need local folder to store registry database
1267 self.reg_dir = makeTestTempDir(TESTDIR)
1268 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1270 # MOTO needs to know that we expect Bucket bucketname to exist
1271 # (this used to be the class attribute bucketName)
1272 s3 = boto3.resource("s3")
1273 s3.create_bucket(Bucket=self.bucketName)
1275 self.datastoreStr = f"datastore={self.root}"
1276 self.datastoreName = [f"FileDatastore@{rooturi}"]
1277 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1278 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1280 def tearDown(self):
1281 s3 = boto3.resource("s3")
1282 bucket = s3.Bucket(self.bucketName)
1283 try:
1284 bucket.objects.all().delete()
1285 except botocore.exceptions.ClientError as e:
1286 if e.response["Error"]["Code"] == "404":
1287 # the key was not reachable - pass
1288 pass
1289 else:
1290 raise
1292 bucket = s3.Bucket(self.bucketName)
1293 bucket.delete()
1295 # unset any potentially set dummy credentials
1296 if self.usingDummyCredentials:
1297 unsetAwsEnvCredentials()
1299 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1300 shutil.rmtree(self.reg_dir, ignore_errors=True)
1302 if self.useTempRoot and os.path.exists(self.root):
1303 shutil.rmtree(self.root, ignore_errors=True)
1306@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1307# Mock required environment variables during tests
1308@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1309 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1310 TESTDIR, "config/testConfigs/webdav/token"),
1311 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1312class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1313 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1314 a local in-memory SqlRegistry.
1315 """
1316 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1317 fullConfigKey = None
1318 validationCanFail = True
1320 serverName = "localhost"
1321 """Name of the server that will be used in the tests.
1322 """
1324 portNumber = 8080
1325 """Port on which the webdav server listens. Automatically chosen
1326 at setUpClass via the _getfreeport() method
1327 """
1329 root = "butlerRoot/"
1330 """Root repository directory expected to be used in case useTempRoot=False.
1331 Otherwise the root is set to a 20 characters long randomly generated string
1332 during set-up.
1333 """
1335 datastoreStr = [f"datastore={root}"]
1336 """Contains all expected root locations in a format expected to be
1337 returned by Butler stringification.
1338 """
1340 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1341 """The expected format of the WebdavDatastore string."""
1343 registryStr = "/gen3.sqlite3"
1344 """Expected format of the Registry string."""
1346 serverThread = None
1347 """Thread in which the local webdav server will run"""
1349 stopWebdavServer = False
1350 """This flag will cause the webdav server to
1351 gracefully shut down when True
1352 """
1354 def genRoot(self):
1355 """Returns a random string of len 20 to serve as a root
1356 name for the temporary bucket repo.
1358 This is equivalent to tempfile.mkdtemp as this is what self.root
1359 becomes when useTempRoot is True.
1360 """
1361 rndstr = "".join(
1362 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1363 )
1364 return rndstr + "/"
1366 @classmethod
1367 def setUpClass(cls):
1368 # Do the same as inherited class
1369 cls.storageClassFactory = StorageClassFactory()
1370 cls.storageClassFactory.addFromConfig(cls.configFile)
1372 cls.portNumber = cls._getfreeport()
1373 # Run a local webdav server on which tests will be run
1374 cls.serverThread = Thread(target=cls._serveWebdav,
1375 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1376 daemon=True)
1377 cls.serverThread.start()
1378 # Wait for it to start
1379 time.sleep(3)
1381 @classmethod
1382 def tearDownClass(cls):
1383 # Ask for graceful shut down of the webdav server
1384 cls.stopWebdavServer = True
1385 # Wait for the thread to exit
1386 cls.serverThread.join()
1388 # Mock required environment variables during tests
1389 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1390 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1391 TESTDIR, "config/testConfigs/webdav/token"),
1392 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1393 def setUp(self):
1394 config = Config(self.configFile)
1396 if self.useTempRoot:
1397 self.root = self.genRoot()
1398 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1399 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1401 # need local folder to store registry database
1402 self.reg_dir = makeTestTempDir(TESTDIR)
1403 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1405 self.datastoreStr = f"datastore={self.root}"
1406 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1408 if not isWebdavEndpoint(self.rooturi):
1409 raise OSError("Webdav server not running properly: cannot run tests.")
1411 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1412 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1414 # Mock required environment variables during tests
1415 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1416 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1417 TESTDIR, "config/testConfigs/webdav/token"),
1418 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1419 def tearDown(self):
1420 # Clear temporary directory
1421 ButlerURI(self.rooturi).remove()
1422 ButlerURI(self.rooturi).session.close()
1424 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1425 shutil.rmtree(self.reg_dir, ignore_errors=True)
1427 if self.useTempRoot and os.path.exists(self.root):
1428 shutil.rmtree(self.root, ignore_errors=True)
1430 def _serveWebdav(self, port: int, stopWebdavServer):
1431 """Starts a local webdav-compatible HTTP server,
1432 Listening on http://localhost:8080
1433 This server only runs when this test class is instantiated,
1434 and then shuts down. Must be started is a separate thread.
1436 Parameters
1437 ----------
1438 port : `int`
1439 The port number on which the server should listen
1440 """
1441 root_path = gettempdir()
1443 config = {
1444 "host": "0.0.0.0",
1445 "port": port,
1446 "provider_mapping": {"/": root_path},
1447 "http_authenticator": {
1448 "domain_controller": None
1449 },
1450 "simple_dc": {"user_mapping": {"*": True}},
1451 "verbose": 0,
1452 }
1453 app = WsgiDAVApp(config)
1455 server_args = {
1456 "bind_addr": (config["host"], config["port"]),
1457 "wsgi_app": app,
1458 }
1459 server = wsgi.Server(**server_args)
1460 server.prepare()
1462 try:
1463 # Start the actual server in a separate thread
1464 t = Thread(target=server.serve, daemon=True)
1465 t.start()
1466 # watch stopWebdavServer, and gracefully
1467 # shut down the server when True
1468 while True:
1469 if stopWebdavServer():
1470 break
1471 time.sleep(1)
1472 except KeyboardInterrupt:
1473 print("Caught Ctrl-C, shutting down...")
1474 finally:
1475 server.stop()
1476 t.join()
1478 def _getfreeport():
1479 """
1480 Determines a free port using sockets.
1481 """
1482 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1483 free_socket.bind(('0.0.0.0', 0))
1484 free_socket.listen()
1485 port = free_socket.getsockname()[1]
1486 free_socket.close()
1487 return port
1490if __name__ == "__main__": 1490 ↛ 1491line 1490 didn't jump to line 1491, because the condition on line 1490 was never true
1491 unittest.main()