Coverage for tests/test_butler.py: 14%
1259 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-28 09:59 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-28 09:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import gc
26import logging
27import os
28import pathlib
29import pickle
30import posixpath
31import random
32import shutil
33import socket
34import string
35import tempfile
36import time
37import unittest
38from tempfile import gettempdir
39from threading import Thread
41try:
42 import boto3
43 import botocore
44 from moto import mock_s3
45except ImportError:
46 boto3 = None
48 def mock_s3(cls):
49 """A no-op decorator in case moto mock_s3 can not be imported."""
50 return cls
53try:
54 # It's possible but silly to have testing.postgresql installed without
55 # having the postgresql server installed (because then nothing in
56 # testing.postgresql would work), so we use the presence of that module
57 # to test whether we can expect the server to be available.
58 import testing.postgresql
59except ImportError:
60 testing = None
63try:
64 from cheroot import wsgi
65 from wsgidav.wsgidav_app import WsgiDAVApp
66except ImportError:
67 WsgiDAVApp = None
69import astropy.time
70import sqlalchemy
71from lsst.daf.butler import (
72 Butler,
73 ButlerConfig,
74 CollectionType,
75 Config,
76 DatasetIdGenEnum,
77 DatasetRef,
78 DatasetType,
79 FileDataset,
80 FileTemplate,
81 FileTemplateValidationError,
82 StorageClassFactory,
83 ValidationError,
84 script,
85)
86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
87from lsst.daf.butler.registry import (
88 CollectionError,
89 CollectionTypeError,
90 ConflictingDefinitionError,
91 DataIdValueError,
92 MissingCollectionError,
93)
94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
96from lsst.resources import ResourcePath
97from lsst.resources.http import _is_webdav_endpoint
98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
99from lsst.utils import doImport
100from lsst.utils.introspection import get_full_type_name
102TESTDIR = os.path.abspath(os.path.dirname(__file__))
105def makeExampleMetrics():
106 return MetricsExample(
107 {"AM1": 5.2, "AM2": 30.6},
108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
109 [563, 234, 456.7, 752, 8, 9, 27],
110 )
113class TransactionTestError(Exception):
114 """Specific error for testing transactions, to prevent misdiagnosing
115 that might otherwise occur when a standard exception is used.
116 """
118 pass
121class ButlerConfigTests(unittest.TestCase):
122 """Simple tests for ButlerConfig that are not tested in any other test
123 cases."""
125 def testSearchPath(self):
126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
128 config1 = ButlerConfig(configFile)
129 self.assertNotIn("testConfigs", "\n".join(cm.output))
131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
134 self.assertIn("testConfigs", "\n".join(cm.output))
136 key = ("datastore", "records", "table")
137 self.assertNotEqual(config1[key], config2[key])
138 self.assertEqual(config2[key], "override_record")
141class ButlerPutGetTests:
142 """Helper method for running a suite of put/get tests from different
143 butler configurations."""
145 root = None
146 default_run = "ingésτ😺"
148 @staticmethod
149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
150 """Create a DatasetType and register it"""
151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
152 registry.registerDatasetType(datasetType)
153 return datasetType
155 @classmethod
156 def setUpClass(cls):
157 cls.storageClassFactory = StorageClassFactory()
158 cls.storageClassFactory.addFromConfig(cls.configFile)
160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
161 datasetType = datasetRef.datasetType
162 dataId = datasetRef.dataId
163 deferred = butler.getDirectDeferred(datasetRef)
165 for component in components:
166 compTypeName = datasetType.componentTypeName(component)
167 result = butler.get(compTypeName, dataId, collections=collections)
168 self.assertEqual(result, getattr(reference, component))
169 result_deferred = deferred.get(component=component)
170 self.assertEqual(result_deferred, result)
172 def tearDown(self):
173 removeTestTempDir(self.root)
175 def create_butler(self, run, storageClass, datasetTypeName):
176 butler = Butler(self.tmpConfigFile, run=run)
178 collections = set(butler.registry.queryCollections())
179 self.assertEqual(collections, set([run]))
181 # Create and register a DatasetType
182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
186 # Add needed Dimensions
187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
188 butler.registry.insertDimensionData(
189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
190 )
191 butler.registry.insertDimensionData(
192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
193 )
194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
196 butler.registry.insertDimensionData(
197 "visit",
198 {
199 "instrument": "DummyCamComp",
200 "id": 423,
201 "name": "fourtwentythree",
202 "physical_filter": "d-r",
203 "visit_system": 1,
204 "datetime_begin": visit_start,
205 "datetime_end": visit_end,
206 },
207 )
209 # Add more visits for some later tests
210 for visit_id in (424, 425):
211 butler.registry.insertDimensionData(
212 "visit",
213 {
214 "instrument": "DummyCamComp",
215 "id": visit_id,
216 "name": f"fourtwentyfour_{visit_id}",
217 "physical_filter": "d-r",
218 "visit_system": 1,
219 },
220 )
221 return butler, datasetType
223 def runPutGetTest(self, storageClass, datasetTypeName):
224 # New datasets will be added to run and tag, but we will only look in
225 # tag when looking up datasets.
226 run = self.default_run
227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
229 # Create and store a dataset
230 metric = makeExampleMetrics()
231 dataId = {"instrument": "DummyCamComp", "visit": 423}
233 # Create a DatasetRef for put
234 refIn = DatasetRef(datasetType, dataId, id=None)
236 # Put with a preexisting id should fail
237 with self.assertRaises(ValueError):
238 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
240 # Put and remove the dataset once as a DatasetRef, once as a dataId,
241 # and once with a DatasetType
243 # Keep track of any collections we add and do not clean up
244 expected_collections = {run}
246 counter = 0
247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
248 # Since we are using subTest we can get cascading failures
249 # here with the first attempt failing and the others failing
250 # immediately because the dataset already exists. Work around
251 # this by using a distinct run collection each time
252 counter += 1
253 this_run = f"put_run_{counter}"
254 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
255 expected_collections.update({this_run})
257 with self.subTest(args=args):
258 ref = butler.put(metric, *args, run=this_run)
259 self.assertIsInstance(ref, DatasetRef)
261 # Test getDirect
262 metricOut = butler.getDirect(ref)
263 self.assertEqual(metric, metricOut)
264 # Test get
265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
266 self.assertEqual(metric, metricOut)
267 # Test get with a datasetRef
268 metricOut = butler.get(ref, collections=this_run)
269 self.assertEqual(metric, metricOut)
270 # Test getDeferred with dataId
271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
272 self.assertEqual(metric, metricOut)
273 # Test getDeferred with a datasetRef
274 metricOut = butler.getDeferred(ref, collections=this_run).get()
275 self.assertEqual(metric, metricOut)
276 # and deferred direct with ref
277 metricOut = butler.getDirectDeferred(ref).get()
278 self.assertEqual(metric, metricOut)
280 # Check we can get components
281 if storageClass.isComposite():
282 self.assertGetComponents(
283 butler, ref, ("summary", "data", "output"), metric, collections=this_run
284 )
286 # Can the artifacts themselves be retrieved?
287 if not butler.datastore.isEphemeral:
288 root_uri = ResourcePath(self.root)
290 for preserve_path in (True, False):
291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
292 # Use copy so that we can test that overwrite
293 # protection works (using "auto" for File URIs would
294 # use hard links and subsequent transfer would work
295 # because it knows they are the same file).
296 transferred = butler.retrieveArtifacts(
297 [ref], destination, preserve_path=preserve_path, transfer="copy"
298 )
299 self.assertGreater(len(transferred), 0)
300 artifacts = list(ResourcePath.findFileResources([destination]))
301 self.assertEqual(set(transferred), set(artifacts))
303 for artifact in transferred:
304 path_in_destination = artifact.relative_to(destination)
305 self.assertIsNotNone(path_in_destination)
307 # when path is not preserved there should not be
308 # any path separators.
309 num_seps = path_in_destination.count("/")
310 if preserve_path:
311 self.assertGreater(num_seps, 0)
312 else:
313 self.assertEqual(num_seps, 0)
315 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
316 n_uris = len(secondary_uris)
317 if primary_uri:
318 n_uris += 1
319 self.assertEqual(
320 len(artifacts),
321 n_uris,
322 "Comparing expected artifacts vs actual:"
323 f" {artifacts} vs {primary_uri} and {secondary_uris}",
324 )
326 if preserve_path:
327 # No need to run these twice
328 with self.assertRaises(ValueError):
329 butler.retrieveArtifacts([ref], destination, transfer="move")
331 with self.assertRaises(FileExistsError):
332 butler.retrieveArtifacts([ref], destination)
334 transferred_again = butler.retrieveArtifacts(
335 [ref], destination, preserve_path=preserve_path, overwrite=True
336 )
337 self.assertEqual(set(transferred_again), set(transferred))
339 # Now remove the dataset completely.
340 butler.pruneDatasets([ref], purge=True, unstore=True)
341 # Lookup with original args should still fail.
342 with self.assertRaises(LookupError):
343 butler.datasetExists(*args, collections=this_run)
344 # getDirect() should still fail.
345 with self.assertRaises(FileNotFoundError):
346 butler.getDirect(ref)
347 # Registry shouldn't be able to find it by dataset_id anymore.
348 self.assertIsNone(butler.registry.getDataset(ref.id))
350 # Do explicit registry removal since we know they are
351 # empty
352 butler.registry.removeCollection(this_run)
353 expected_collections.remove(this_run)
355 # Put the dataset again, since the last thing we did was remove it
356 # and we want to use the default collection.
357 ref = butler.put(metric, refIn)
359 # Get with parameters
360 stop = 4
361 sliced = butler.get(ref, parameters={"slice": slice(stop)})
362 self.assertNotEqual(metric, sliced)
363 self.assertEqual(metric.summary, sliced.summary)
364 self.assertEqual(metric.output, sliced.output)
365 self.assertEqual(metric.data[:stop], sliced.data)
366 # getDeferred with parameters
367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
368 self.assertNotEqual(metric, sliced)
369 self.assertEqual(metric.summary, sliced.summary)
370 self.assertEqual(metric.output, sliced.output)
371 self.assertEqual(metric.data[:stop], sliced.data)
372 # getDeferred with deferred parameters
373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
374 self.assertNotEqual(metric, sliced)
375 self.assertEqual(metric.summary, sliced.summary)
376 self.assertEqual(metric.output, sliced.output)
377 self.assertEqual(metric.data[:stop], sliced.data)
379 if storageClass.isComposite():
380 # Check that components can be retrieved
381 metricOut = butler.get(ref.datasetType.name, dataId)
382 compNameS = ref.datasetType.componentTypeName("summary")
383 compNameD = ref.datasetType.componentTypeName("data")
384 summary = butler.get(compNameS, dataId)
385 self.assertEqual(summary, metric.summary)
386 data = butler.get(compNameD, dataId)
387 self.assertEqual(data, metric.data)
389 if "counter" in storageClass.derivedComponents:
390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
391 self.assertEqual(count, len(data))
393 count = butler.get(
394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
395 )
396 self.assertEqual(count, stop)
398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
399 summary = butler.getDirect(compRef)
400 self.assertEqual(summary, metric.summary)
402 # Create a Dataset type that has the same name but is inconsistent.
403 inconsistentDatasetType = DatasetType(
404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
405 )
407 # Getting with a dataset type that does not match registry fails
408 with self.assertRaises(ValueError):
409 butler.get(inconsistentDatasetType, dataId)
411 # Combining a DatasetRef with a dataId should fail
412 with self.assertRaises(ValueError):
413 butler.get(ref, dataId)
414 # Getting with an explicit ref should fail if the id doesn't match
415 with self.assertRaises(ValueError):
416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
418 # Getting a dataset with unknown parameters should fail
419 with self.assertRaises(KeyError):
420 butler.get(ref, parameters={"unsupported": True})
422 # Check we have a collection
423 collections = set(butler.registry.queryCollections())
424 self.assertEqual(collections, expected_collections)
426 # Clean up to check that we can remove something that may have
427 # already had a component removed
428 butler.pruneDatasets([ref], unstore=True, purge=True)
430 # Check that we can configure a butler to accept a put even
431 # if it already has the dataset in registry.
432 ref = butler.put(metric, refIn)
434 # Repeat put will fail.
435 with self.assertRaises(ConflictingDefinitionError):
436 butler.put(metric, refIn)
438 # Remove the datastore entry.
439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
441 # Put will still fail
442 with self.assertRaises(ConflictingDefinitionError):
443 butler.put(metric, refIn)
445 # Allow the put to succeed
446 butler._allow_put_of_predefined_dataset = True
447 ref2 = butler.put(metric, refIn)
448 self.assertEqual(ref2.id, ref.id)
450 # A second put will still fail but with a different exception
451 # than before.
452 with self.assertRaises(ConflictingDefinitionError):
453 butler.put(metric, refIn)
455 # Reset the flag to avoid confusion
456 butler._allow_put_of_predefined_dataset = False
458 # Leave the dataset in place since some downstream tests require
459 # something to be present
461 return butler
463 def testDeferredCollectionPassing(self):
464 # Construct a butler with no run or collection, but make it writeable.
465 butler = Butler(self.tmpConfigFile, writeable=True)
466 # Create and register a DatasetType
467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
468 datasetType = self.addDatasetType(
469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
470 )
471 # Add needed Dimensions
472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
473 butler.registry.insertDimensionData(
474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
475 )
476 butler.registry.insertDimensionData(
477 "visit",
478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
479 )
480 dataId = {"instrument": "DummyCamComp", "visit": 423}
481 # Create dataset.
482 metric = makeExampleMetrics()
483 # Register a new run and put dataset.
484 run = "deferred"
485 self.assertTrue(butler.registry.registerRun(run))
486 # Second time it will be allowed but indicate no-op
487 self.assertFalse(butler.registry.registerRun(run))
488 ref = butler.put(metric, datasetType, dataId, run=run)
489 # Putting with no run should fail with TypeError.
490 with self.assertRaises(CollectionError):
491 butler.put(metric, datasetType, dataId)
492 # Dataset should exist.
493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
494 # We should be able to get the dataset back, but with and without
495 # a deferred dataset handle.
496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
498 # Trying to find the dataset without any collection is a TypeError.
499 with self.assertRaises(CollectionError):
500 butler.datasetExists(datasetType, dataId)
501 with self.assertRaises(CollectionError):
502 butler.get(datasetType, dataId)
503 # Associate the dataset with a different collection.
504 butler.registry.registerCollection("tagged")
505 butler.registry.associate("tagged", [ref])
506 # Deleting the dataset from the new collection should make it findable
507 # in the original collection.
508 butler.pruneDatasets([ref], tags=["tagged"])
509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
512class ButlerTests(ButlerPutGetTests):
513 """Tests for Butler."""
515 useTempRoot = True
517 def setUp(self):
518 """Create a new butler root for each test."""
519 self.root = makeTestTempDir(TESTDIR)
520 Butler.makeRepo(self.root, config=Config(self.configFile))
521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
523 def testConstructor(self):
524 """Independent test of constructor."""
525 butler = Butler(self.tmpConfigFile, run=self.default_run)
526 self.assertIsInstance(butler, Butler)
528 # Check that butler.yaml is added automatically.
529 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
530 config_dir = self.tmpConfigFile[: -len(end)]
531 butler = Butler(config_dir, run=self.default_run)
532 self.assertIsInstance(butler, Butler)
534 # Even with a ResourcePath.
535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
536 self.assertIsInstance(butler, Butler)
538 collections = set(butler.registry.queryCollections())
539 self.assertEqual(collections, {self.default_run})
541 # Check that some special characters can be included in run name.
542 special_run = "u@b.c-A"
543 butler_special = Butler(butler=butler, run=special_run)
544 collections = set(butler_special.registry.queryCollections("*@*"))
545 self.assertEqual(collections, {special_run})
547 butler2 = Butler(butler=butler, collections=["other"])
548 self.assertEqual(butler2.collections, ("other",))
549 self.assertIsNone(butler2.run)
550 self.assertIs(butler.datastore, butler2.datastore)
552 # Test that we can use an environment variable to find this
553 # repository.
554 butler_index = Config()
555 butler_index["label"] = self.tmpConfigFile
556 for suffix in (".yaml", ".json"):
557 # Ensure that the content differs so that we know that
558 # we aren't reusing the cache.
559 bad_label = f"s3://bucket/not_real{suffix}"
560 butler_index["bad_label"] = bad_label
561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
562 butler_index.dumpToUri(temp_file)
563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label")))
565 uri = Butler.get_repo_uri("bad_label")
566 self.assertEqual(uri, ResourcePath(bad_label))
567 uri = Butler.get_repo_uri("label")
568 butler = Butler(uri, writeable=False)
569 self.assertIsInstance(butler, Butler)
570 butler = Butler("label", writeable=False)
571 self.assertIsInstance(butler, Butler)
572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
573 Butler("not_there", writeable=False)
574 with self.assertRaises(KeyError) as cm:
575 Butler.get_repo_uri("missing")
576 self.assertIn("not known to", str(cm.exception))
577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
578 with self.assertRaises(FileNotFoundError):
579 Butler.get_repo_uri("label")
580 self.assertEqual(Butler.get_known_repos(), set())
581 with self.assertRaises(KeyError) as cm:
582 # No environment variable set.
583 Butler.get_repo_uri("label")
584 self.assertIn("No repository index defined", str(cm.exception))
585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"):
586 # No aliases registered.
587 Butler("not_there")
588 self.assertEqual(Butler.get_known_repos(), set())
590 def testBasicPutGet(self):
591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
592 self.runPutGetTest(storageClass, "test_metric")
594 def testCompositePutGetConcrete(self):
596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
597 butler = self.runPutGetTest(storageClass, "test_metric")
599 # Should *not* be disassembled
600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
601 self.assertEqual(len(datasets), 1)
602 uri, components = butler.getURIs(datasets[0])
603 self.assertIsInstance(uri, ResourcePath)
604 self.assertFalse(components)
605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
608 # Predicted dataset
609 dataId = {"instrument": "DummyCamComp", "visit": 424}
610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
611 self.assertFalse(components)
612 self.assertIsInstance(uri, ResourcePath)
613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
616 def testCompositePutGetVirtual(self):
617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
618 butler = self.runPutGetTest(storageClass, "test_metric_comp")
620 # Should be disassembled
621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
622 self.assertEqual(len(datasets), 1)
623 uri, components = butler.getURIs(datasets[0])
625 if butler.datastore.isEphemeral:
626 # Never disassemble in-memory datastore
627 self.assertIsInstance(uri, ResourcePath)
628 self.assertFalse(components)
629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
631 else:
632 self.assertIsNone(uri)
633 self.assertEqual(set(components), set(storageClass.components))
634 for compuri in components.values():
635 self.assertIsInstance(compuri, ResourcePath)
636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
639 # Predicted dataset
640 dataId = {"instrument": "DummyCamComp", "visit": 424}
641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
643 if butler.datastore.isEphemeral:
644 # Never disassembled
645 self.assertIsInstance(uri, ResourcePath)
646 self.assertFalse(components)
647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
649 else:
650 self.assertIsNone(uri)
651 self.assertEqual(set(components), set(storageClass.components))
652 for compuri in components.values():
653 self.assertIsInstance(compuri, ResourcePath)
654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
657 def testStorageClassOverrideGet(self):
658 """Test storage class conversion on get with override."""
659 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
660 datasetTypeName = "anything"
661 run = self.default_run
663 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
665 # Create and store a dataset.
666 metric = makeExampleMetrics()
667 dataId = {"instrument": "DummyCamComp", "visit": 423}
669 ref = butler.put(metric, datasetType, dataId)
671 # Return native type.
672 retrieved = butler.get(ref)
673 self.assertEqual(retrieved, metric)
675 # Specify an override.
676 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
677 model = butler.getDirect(ref, storageClass=new_sc)
678 self.assertNotEqual(type(model), type(retrieved))
679 self.assertIs(type(model), new_sc.pytype)
680 self.assertEqual(retrieved, model)
682 # Defer but override later.
683 deferred = butler.getDirectDeferred(ref)
684 model = deferred.get(storageClass=new_sc)
685 self.assertIs(type(model), new_sc.pytype)
686 self.assertEqual(retrieved, model)
688 # Defer but override up front.
689 deferred = butler.getDirectDeferred(ref, storageClass=new_sc)
690 model = deferred.get()
691 self.assertIs(type(model), new_sc.pytype)
692 self.assertEqual(retrieved, model)
694 # Retrieve a component. Should be a tuple.
695 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
696 self.assertIs(type(data), tuple)
697 self.assertEqual(data, tuple(retrieved.data))
699 # Parameter on the write storage class should work regardless
700 # of read storage class.
701 data = butler.get(
702 "anything.data",
703 dataId,
704 storageClass="StructuredDataDataTestTuple",
705 parameters={"slice": slice(2, 4)},
706 )
707 self.assertEqual(len(data), 2)
709 # Try a parameter that is known to the read storage class but not
710 # the write storage class.
711 with self.assertRaises(KeyError):
712 butler.get(
713 "anything.data",
714 dataId,
715 storageClass="StructuredDataDataTestTuple",
716 parameters={"xslice": slice(2, 4)},
717 )
719 def testPytypePutCoercion(self):
720 """Test python type coercion on Butler.get and put."""
722 # Store some data with the normal example storage class.
723 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
724 datasetTypeName = "test_metric"
725 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
727 dataId = {"instrument": "DummyCamComp", "visit": 423}
729 # Put a dict and this should coerce to a MetricsExample
730 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
731 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
732 test_metric = butler.getDirect(metric_ref)
733 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
734 self.assertEqual(test_metric.summary, test_dict["summary"])
735 self.assertEqual(test_metric.output, test_dict["output"])
737 # Check that the put still works if a DatasetType is given with
738 # a definition matching this python type.
739 registry_type = butler.registry.getDatasetType(datasetTypeName)
740 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
741 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
742 self.assertEqual(metric2_ref.datasetType, registry_type)
744 # The get will return the type expected by registry.
745 test_metric2 = butler.getDirect(metric2_ref)
746 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
748 # Make a new DatasetRef with the compatible but different DatasetType.
749 # This should now return a dict.
750 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
751 test_dict2 = butler.getDirect(new_ref)
752 self.assertEqual(get_full_type_name(test_dict2), "dict")
754 # Get it again with the wrong dataset type definition using get()
755 # rather than getDirect(). This should be consistent with getDirect()
756 # behavior and return the type of the DatasetType.
757 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
758 self.assertEqual(get_full_type_name(test_dict3), "dict")
760 def testIngest(self):
761 butler = Butler(self.tmpConfigFile, run=self.default_run)
763 # Create and register a DatasetType
764 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
766 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
767 datasetTypeName = "metric"
769 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
771 # Add needed Dimensions
772 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
773 butler.registry.insertDimensionData(
774 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
775 )
776 for detector in (1, 2):
777 butler.registry.insertDimensionData(
778 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
779 )
781 butler.registry.insertDimensionData(
782 "visit",
783 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
784 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
785 )
787 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
788 dataRoot = os.path.join(TESTDIR, "data", "basic")
789 datasets = []
790 for detector in (1, 2):
791 detector_name = f"detector_{detector}"
792 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
793 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
794 # Create a DatasetRef for ingest
795 refIn = DatasetRef(datasetType, dataId, id=None)
797 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
799 butler.ingest(*datasets, transfer="copy")
801 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
802 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
804 metrics1 = butler.get(datasetTypeName, dataId1)
805 metrics2 = butler.get(datasetTypeName, dataId2)
806 self.assertNotEqual(metrics1, metrics2)
808 # Compare URIs
809 uri1 = butler.getURI(datasetTypeName, dataId1)
810 uri2 = butler.getURI(datasetTypeName, dataId2)
811 self.assertNotEqual(uri1, uri2)
813 # Now do a multi-dataset but single file ingest
814 metricFile = os.path.join(dataRoot, "detectors.yaml")
815 refs = []
816 for detector in (1, 2):
817 detector_name = f"detector_{detector}"
818 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
819 # Create a DatasetRef for ingest
820 refs.append(DatasetRef(datasetType, dataId, id=None))
822 # Test "move" transfer to ensure that the files themselves
823 # have disappeared following ingest.
824 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
825 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
827 datasets = []
828 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
830 butler.ingest(*datasets, transfer="move", record_validation_info=False)
831 self.assertFalse(tempFile.exists())
833 # Check that the datastore recorded no file size.
834 # Not all datastores can support this.
835 try:
836 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0])
837 self.assertEqual(infos[0].file_size, -1)
838 except AttributeError:
839 pass
841 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
842 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
844 multi1 = butler.get(datasetTypeName, dataId1)
845 multi2 = butler.get(datasetTypeName, dataId2)
847 self.assertEqual(multi1, metrics1)
848 self.assertEqual(multi2, metrics2)
850 # Compare URIs
851 uri1 = butler.getURI(datasetTypeName, dataId1)
852 uri2 = butler.getURI(datasetTypeName, dataId2)
853 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
855 # Test that removing one does not break the second
856 # This line will issue a warning log message for a ChainedDatastore
857 # that uses an InMemoryDatastore since in-memory can not ingest
858 # files.
859 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
860 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
861 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
862 multi2b = butler.get(datasetTypeName, dataId2)
863 self.assertEqual(multi2, multi2b)
865 def testPruneCollections(self):
866 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
867 butler = Butler(self.tmpConfigFile, writeable=True)
868 # Load registry data with dimensions to hang datasets off of.
869 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
870 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
871 # Add some RUN-type collections.
872 run1 = "run1"
873 butler.registry.registerRun(run1)
874 run2 = "run2"
875 butler.registry.registerRun(run2)
876 # put some datasets. ref1 and ref2 have the same data ID, and are in
877 # different runs. ref3 has a different data ID.
878 metric = makeExampleMetrics()
879 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
880 datasetType = self.addDatasetType(
881 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
882 )
883 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
884 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
885 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
887 # Try to delete a RUN collection without purge, or with purge and not
888 # unstore.
889 with self.assertRaises(TypeError):
890 butler.pruneCollection(run1)
891 with self.assertRaises(TypeError):
892 butler.pruneCollection(run2, purge=True)
893 # Add a TAGGED collection and associate ref3 only into it.
894 tag1 = "tag1"
895 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
896 self.assertTrue(registered)
897 # Registering a second time should be allowed.
898 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
899 self.assertFalse(registered)
900 butler.registry.associate(tag1, [ref3])
901 # Add a CHAINED collection that searches run1 and then run2. It
902 # logically contains only ref1, because ref2 is shadowed due to them
903 # having the same data ID and dataset type.
904 chain1 = "chain1"
905 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
906 butler.registry.setCollectionChain(chain1, [run1, run2])
907 # Try to delete RUN collections, which should fail with complete
908 # rollback because they're still referenced by the CHAINED
909 # collection.
910 with self.assertRaises(sqlalchemy.exc.IntegrityError):
911 butler.pruneCollection(run1, purge=True, unstore=True)
912 with self.assertRaises(sqlalchemy.exc.IntegrityError):
913 butler.pruneCollection(run2, purge=True, unstore=True)
914 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
915 existence = butler.datastore.mexists([ref1, ref2, ref3])
916 self.assertTrue(existence[ref1])
917 self.assertTrue(existence[ref2])
918 self.assertTrue(existence[ref3])
919 # Try to delete CHAINED and TAGGED collections with purge; should not
920 # work.
921 with self.assertRaises(TypeError):
922 butler.pruneCollection(tag1, purge=True, unstore=True)
923 with self.assertRaises(TypeError):
924 butler.pruneCollection(chain1, purge=True, unstore=True)
925 # Remove the tagged collection with unstore=False. This should not
926 # affect the datasets.
927 butler.pruneCollection(tag1)
928 with self.assertRaises(MissingCollectionError):
929 butler.registry.getCollectionType(tag1)
930 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
931 existence = butler.datastore.mexists([ref1, ref2, ref3])
932 self.assertTrue(existence[ref1])
933 self.assertTrue(existence[ref2])
934 self.assertTrue(existence[ref3])
935 # Add the tagged collection back in, and remove it with unstore=True.
936 # This should remove ref3 only from the datastore.
937 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
938 butler.registry.associate(tag1, [ref3])
939 butler.pruneCollection(tag1, unstore=True)
940 with self.assertRaises(MissingCollectionError):
941 butler.registry.getCollectionType(tag1)
942 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
943 existence = butler.datastore.mexists([ref1, ref2, ref3])
944 self.assertTrue(existence[ref1])
945 self.assertTrue(existence[ref2])
946 self.assertFalse(existence[ref3])
947 # Delete the chain with unstore=False. The datasets should not be
948 # affected at all.
949 butler.pruneCollection(chain1)
950 with self.assertRaises(MissingCollectionError):
951 butler.registry.getCollectionType(chain1)
952 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
953 existence = butler.datastore.mexists([ref1, ref2, ref3])
954 self.assertTrue(existence[ref1])
955 self.assertTrue(existence[ref2])
956 self.assertFalse(existence[ref3])
957 existence = butler.datastore.knows_these([ref1, ref2, ref3])
958 self.assertTrue(existence[ref1])
959 self.assertTrue(existence[ref2])
960 self.assertFalse(existence[ref3])
961 # Redefine and then delete the chain with unstore=True. Only ref1
962 # should be unstored (ref3 has already been unstored, but otherwise
963 # would be now).
964 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
965 butler.registry.setCollectionChain(chain1, [run1, run2])
966 butler.pruneCollection(chain1, unstore=True)
967 with self.assertRaises(MissingCollectionError):
968 butler.registry.getCollectionType(chain1)
969 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3])
970 existence = butler.datastore.mexists([ref1, ref2, ref3])
971 self.assertFalse(existence[ref1])
972 self.assertTrue(existence[ref2])
973 self.assertFalse(existence[ref3])
974 # Remove run1. This removes ref1 and ref3 from the registry (they're
975 # already gone from the datastore, which is fine).
976 butler.pruneCollection(run1, purge=True, unstore=True)
977 with self.assertRaises(MissingCollectionError):
978 butler.registry.getCollectionType(run1)
979 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2])
980 self.assertTrue(butler.datastore.exists(ref2))
981 self.assertTrue(butler.datastore.knows(ref2))
982 # Remove run2. This removes ref2 from the registry and the datastore.
983 butler.pruneCollection(run2, purge=True, unstore=True)
984 with self.assertRaises(MissingCollectionError):
985 butler.registry.getCollectionType(run2)
986 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [])
988 # Now that the collections have been pruned we can remove the
989 # dataset type
990 butler.registry.removeDatasetType(datasetType.name)
992 def testPickle(self):
993 """Test pickle support."""
994 butler = Butler(self.tmpConfigFile, run=self.default_run)
995 butlerOut = pickle.loads(pickle.dumps(butler))
996 self.assertIsInstance(butlerOut, Butler)
997 self.assertEqual(butlerOut._config, butler._config)
998 self.assertEqual(butlerOut.collections, butler.collections)
999 self.assertEqual(butlerOut.run, butler.run)
1001 def testGetDatasetTypes(self):
1002 butler = Butler(self.tmpConfigFile, run=self.default_run)
1003 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
1004 dimensionEntries = [
1005 (
1006 "instrument",
1007 {"instrument": "DummyCam"},
1008 {"instrument": "DummyHSC"},
1009 {"instrument": "DummyCamComp"},
1010 ),
1011 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1012 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1013 ]
1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1015 # Add needed Dimensions
1016 for args in dimensionEntries:
1017 butler.registry.insertDimensionData(*args)
1019 # When a DatasetType is added to the registry entries are not created
1020 # for components but querying them can return the components.
1021 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1022 components = set()
1023 for datasetTypeName in datasetTypeNames:
1024 # Create and register a DatasetType
1025 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1027 for componentName in storageClass.components:
1028 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1030 fromRegistry: set[DatasetType] = set()
1031 for parent_dataset_type in butler.registry.queryDatasetTypes():
1032 fromRegistry.add(parent_dataset_type)
1033 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1034 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1036 # Now that we have some dataset types registered, validate them
1037 butler.validateConfiguration(
1038 ignore=[
1039 "test_metric_comp",
1040 "metric3",
1041 "metric5",
1042 "calexp",
1043 "DummySC",
1044 "datasetType.component",
1045 "random_data",
1046 "random_data_2",
1047 ]
1048 )
1050 # Add a new datasetType that will fail template validation
1051 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1052 if self.validationCanFail:
1053 with self.assertRaises(ValidationError):
1054 butler.validateConfiguration()
1056 # Rerun validation but with a subset of dataset type names
1057 butler.validateConfiguration(datasetTypeNames=["metric4"])
1059 # Rerun validation but ignore the bad datasetType
1060 butler.validateConfiguration(
1061 ignore=[
1062 "test_metric_comp",
1063 "metric3",
1064 "metric5",
1065 "calexp",
1066 "DummySC",
1067 "datasetType.component",
1068 "random_data",
1069 "random_data_2",
1070 ]
1071 )
1073 def testTransaction(self):
1074 butler = Butler(self.tmpConfigFile, run=self.default_run)
1075 datasetTypeName = "test_metric"
1076 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1077 dimensionEntries = (
1078 ("instrument", {"instrument": "DummyCam"}),
1079 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1080 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1081 )
1082 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1083 metric = makeExampleMetrics()
1084 dataId = {"instrument": "DummyCam", "visit": 42}
1085 # Create and register a DatasetType
1086 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1087 with self.assertRaises(TransactionTestError):
1088 with butler.transaction():
1089 # Add needed Dimensions
1090 for args in dimensionEntries:
1091 butler.registry.insertDimensionData(*args)
1092 # Store a dataset
1093 ref = butler.put(metric, datasetTypeName, dataId)
1094 self.assertIsInstance(ref, DatasetRef)
1095 # Test getDirect
1096 metricOut = butler.getDirect(ref)
1097 self.assertEqual(metric, metricOut)
1098 # Test get
1099 metricOut = butler.get(datasetTypeName, dataId)
1100 self.assertEqual(metric, metricOut)
1101 # Check we can get components
1102 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1103 raise TransactionTestError("This should roll back the entire transaction")
1104 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1105 butler.registry.expandDataId(dataId)
1106 # Should raise LookupError for missing data ID value
1107 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1108 butler.get(datasetTypeName, dataId)
1109 # Also check explicitly if Dataset entry is missing
1110 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1111 # Direct retrieval should not find the file in the Datastore
1112 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1113 butler.getDirect(ref)
1115 def testMakeRepo(self):
1116 """Test that we can write butler configuration to a new repository via
1117 the Butler.makeRepo interface and then instantiate a butler from the
1118 repo root.
1119 """
1120 # Do not run the test if we know this datastore configuration does
1121 # not support a file system root
1122 if self.fullConfigKey is None:
1123 return
1125 # create two separate directories
1126 root1 = tempfile.mkdtemp(dir=self.root)
1127 root2 = tempfile.mkdtemp(dir=self.root)
1129 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1130 limited = Config(self.configFile)
1131 butler1 = Butler(butlerConfig)
1132 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1133 full = Config(self.tmpConfigFile)
1134 butler2 = Butler(butlerConfig)
1135 # Butlers should have the same configuration regardless of whether
1136 # defaults were expanded.
1137 self.assertEqual(butler1._config, butler2._config)
1138 # Config files loaded directly should not be the same.
1139 self.assertNotEqual(limited, full)
1140 # Make sure "limited" doesn't have a few keys we know it should be
1141 # inheriting from defaults.
1142 self.assertIn(self.fullConfigKey, full)
1143 self.assertNotIn(self.fullConfigKey, limited)
1145 # Collections don't appear until something is put in them
1146 collections1 = set(butler1.registry.queryCollections())
1147 self.assertEqual(collections1, set())
1148 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1150 # Check that a config with no associated file name will not
1151 # work properly with relocatable Butler repo
1152 butlerConfig.configFile = None
1153 with self.assertRaises(ValueError):
1154 Butler(butlerConfig)
1156 with self.assertRaises(FileExistsError):
1157 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1159 def testStringification(self):
1160 butler = Butler(self.tmpConfigFile, run=self.default_run)
1161 butlerStr = str(butler)
1163 if self.datastoreStr is not None:
1164 for testStr in self.datastoreStr:
1165 self.assertIn(testStr, butlerStr)
1166 if self.registryStr is not None:
1167 self.assertIn(self.registryStr, butlerStr)
1169 datastoreName = butler.datastore.name
1170 if self.datastoreName is not None:
1171 for testStr in self.datastoreName:
1172 self.assertIn(testStr, datastoreName)
1174 def testButlerRewriteDataId(self):
1175 """Test that dataIds can be rewritten based on dimension records."""
1177 butler = Butler(self.tmpConfigFile, run=self.default_run)
1179 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1180 datasetTypeName = "random_data"
1182 # Create dimension records.
1183 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1184 butler.registry.insertDimensionData(
1185 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1186 )
1187 butler.registry.insertDimensionData(
1188 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1189 )
1191 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1192 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1193 butler.registry.registerDatasetType(datasetType)
1195 n_exposures = 5
1196 dayobs = 20210530
1198 for i in range(n_exposures):
1199 butler.registry.insertDimensionData(
1200 "exposure",
1201 {
1202 "instrument": "DummyCamComp",
1203 "id": i,
1204 "obs_id": f"exp{i}",
1205 "seq_num": i,
1206 "day_obs": dayobs,
1207 "physical_filter": "d-r",
1208 },
1209 )
1211 # Write some data.
1212 for i in range(n_exposures):
1213 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1215 # Use the seq_num for the put to test rewriting.
1216 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1217 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1219 # Check that the exposure is correct in the dataId
1220 self.assertEqual(ref.dataId["exposure"], i)
1222 # and check that we can get the dataset back with the same dataId
1223 new_metric = butler.get(datasetTypeName, dataId=dataId)
1224 self.assertEqual(new_metric, metric)
1227class FileDatastoreButlerTests(ButlerTests):
1228 """Common tests and specialization of ButlerTests for butlers backed
1229 by datastores that inherit from FileDatastore.
1230 """
1232 def checkFileExists(self, root, relpath):
1233 """Checks if file exists at a given path (relative to root).
1235 Test testPutTemplates verifies actual physical existance of the files
1236 in the requested location.
1237 """
1238 uri = ResourcePath(root, forceDirectory=True)
1239 return uri.join(relpath).exists()
1241 def testPutTemplates(self):
1242 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1243 butler = Butler(self.tmpConfigFile, run=self.default_run)
1245 # Add needed Dimensions
1246 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1247 butler.registry.insertDimensionData(
1248 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1249 )
1250 butler.registry.insertDimensionData(
1251 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1252 )
1253 butler.registry.insertDimensionData(
1254 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1255 )
1257 # Create and store a dataset
1258 metric = makeExampleMetrics()
1260 # Create two almost-identical DatasetTypes (both will use default
1261 # template)
1262 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1263 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1264 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1265 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1267 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1268 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1270 # Put with exactly the data ID keys needed
1271 ref = butler.put(metric, "metric1", dataId1)
1272 uri = butler.getURI(ref)
1273 self.assertTrue(uri.exists())
1274 self.assertTrue(
1275 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1276 )
1278 # Check the template based on dimensions
1279 if hasattr(butler.datastore, "templates"):
1280 butler.datastore.templates.validateTemplates([ref])
1282 # Put with extra data ID keys (physical_filter is an optional
1283 # dependency); should not change template (at least the way we're
1284 # defining them to behave now; the important thing is that they
1285 # must be consistent).
1286 ref = butler.put(metric, "metric2", dataId2)
1287 uri = butler.getURI(ref)
1288 self.assertTrue(uri.exists())
1289 self.assertTrue(
1290 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1291 )
1293 # Check the template based on dimensions
1294 if hasattr(butler.datastore, "templates"):
1295 butler.datastore.templates.validateTemplates([ref])
1297 # Use a template that has a typo in dimension record metadata.
1298 # Easier to test with a butler that has a ref with records attached.
1299 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1300 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1301 path = template.format(ref)
1302 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1304 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1305 with self.assertRaises(KeyError):
1306 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1307 template.format(ref)
1309 # Now use a file template that will not result in unique filenames
1310 with self.assertRaises(FileTemplateValidationError):
1311 butler.put(metric, "metric3", dataId1)
1313 def testImportExport(self):
1314 # Run put/get tests just to create and populate a repo.
1315 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1316 self.runImportExportTest(storageClass)
1318 @unittest.expectedFailure
1319 def testImportExportVirtualComposite(self):
1320 # Run put/get tests just to create and populate a repo.
1321 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1322 self.runImportExportTest(storageClass)
1324 def runImportExportTest(self, storageClass):
1325 """This test does an export to a temp directory and an import back
1326 into a new temp directory repo. It does not assume a posix datastore"""
1327 exportButler = self.runPutGetTest(storageClass, "test_metric")
1328 # Test that the repo actually has at least one dataset.
1329 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1330 self.assertGreater(len(datasets), 0)
1331 # Add a DimensionRecord that's unused by those datasets.
1332 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1333 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1334 # Export and then import datasets.
1335 with safeTestTempDir(TESTDIR) as exportDir:
1336 exportFile = os.path.join(exportDir, "exports.yaml")
1337 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1338 export.saveDatasets(datasets)
1339 # Export the same datasets again. This should quietly do
1340 # nothing because of internal deduplication, and it shouldn't
1341 # complain about being asked to export the "htm7" elements even
1342 # though there aren't any in these datasets or in the database.
1343 export.saveDatasets(datasets, elements=["htm7"])
1344 # Save one of the data IDs again; this should be harmless
1345 # because of internal deduplication.
1346 export.saveDataIds([datasets[0].dataId])
1347 # Save some dimension records directly.
1348 export.saveDimensionData("skymap", [skymapRecord])
1349 self.assertTrue(os.path.exists(exportFile))
1350 with safeTestTempDir(TESTDIR) as importDir:
1351 # We always want this to be a local posix butler
1352 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1353 # Calling script.butlerImport tests the implementation of the
1354 # butler command line interface "import" subcommand. Functions
1355 # in the script folder are generally considered protected and
1356 # should not be used as public api.
1357 with open(exportFile, "r") as f:
1358 script.butlerImport(
1359 importDir,
1360 export_file=f,
1361 directory=exportDir,
1362 transfer="auto",
1363 skip_dimensions=None,
1364 reuse_ids=False,
1365 )
1366 importButler = Butler(importDir, run=self.default_run)
1367 for ref in datasets:
1368 with self.subTest(ref=ref):
1369 # Test for existence by passing in the DatasetType and
1370 # data ID separately, to avoid lookup by dataset_id.
1371 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1372 self.assertEqual(
1373 list(importButler.registry.queryDimensionRecords("skymap")),
1374 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)],
1375 )
1377 def testRemoveRuns(self):
1378 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1379 butler = Butler(self.tmpConfigFile, writeable=True)
1380 # Load registry data with dimensions to hang datasets off of.
1381 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1382 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1383 # Add some RUN-type collection.
1384 run1 = "run1"
1385 butler.registry.registerRun(run1)
1386 run2 = "run2"
1387 butler.registry.registerRun(run2)
1388 # put a dataset in each
1389 metric = makeExampleMetrics()
1390 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1391 datasetType = self.addDatasetType(
1392 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1393 )
1394 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1395 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1396 uri1 = butler.getURI(ref1, collections=[run1])
1397 uri2 = butler.getURI(ref2, collections=[run2])
1398 # Remove from both runs with different values for unstore.
1399 butler.removeRuns([run1], unstore=True)
1400 butler.removeRuns([run2], unstore=False)
1401 # Should be nothing in registry for either one, and datastore should
1402 # not think either exists.
1403 with self.assertRaises(MissingCollectionError):
1404 butler.registry.getCollectionType(run1)
1405 with self.assertRaises(MissingCollectionError):
1406 butler.registry.getCollectionType(run2)
1407 self.assertFalse(butler.datastore.exists(ref1))
1408 self.assertFalse(butler.datastore.exists(ref2))
1409 # The ref we unstored should be gone according to the URI, but the
1410 # one we forgot should still be around.
1411 self.assertFalse(uri1.exists())
1412 self.assertTrue(uri2.exists())
1415class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1416 """PosixDatastore specialization of a butler"""
1418 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1419 fullConfigKey = ".datastore.formatters"
1420 validationCanFail = True
1421 datastoreStr = ["/tmp"]
1422 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1423 registryStr = "/gen3.sqlite3"
1425 def testPathConstructor(self):
1426 """Independent test of constructor using PathLike."""
1427 butler = Butler(self.tmpConfigFile, run=self.default_run)
1428 self.assertIsInstance(butler, Butler)
1430 # And again with a Path object with the butler yaml
1431 path = pathlib.Path(self.tmpConfigFile)
1432 butler = Butler(path, writeable=False)
1433 self.assertIsInstance(butler, Butler)
1435 # And again with a Path object without the butler yaml
1436 # (making sure we skip it if the tmp config doesn't end
1437 # in butler.yaml -- which is the case for a subclass)
1438 if self.tmpConfigFile.endswith("butler.yaml"):
1439 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1440 butler = Butler(path, writeable=False)
1441 self.assertIsInstance(butler, Butler)
1443 def testExportTransferCopy(self):
1444 """Test local export using all transfer modes"""
1445 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1446 exportButler = self.runPutGetTest(storageClass, "test_metric")
1447 # Test that the repo actually has at least one dataset.
1448 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1449 self.assertGreater(len(datasets), 0)
1450 uris = [exportButler.getURI(d) for d in datasets]
1451 datastoreRoot = exportButler.datastore.root
1453 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1455 for path in pathsInStore:
1456 # Assume local file system
1457 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1459 for transfer in ("copy", "link", "symlink", "relsymlink"):
1460 with safeTestTempDir(TESTDIR) as exportDir:
1461 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1462 export.saveDatasets(datasets)
1463 for path in pathsInStore:
1464 self.assertTrue(
1465 self.checkFileExists(exportDir, path),
1466 f"Check that mode {transfer} exported files",
1467 )
1469 def testPruneDatasets(self):
1470 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1471 butler = Butler(self.tmpConfigFile, writeable=True)
1472 # Load registry data with dimensions to hang datasets off of.
1473 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1474 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1475 # Add some RUN-type collections.
1476 run1 = "run1"
1477 butler.registry.registerRun(run1)
1478 run2 = "run2"
1479 butler.registry.registerRun(run2)
1480 # put some datasets. ref1 and ref2 have the same data ID, and are in
1481 # different runs. ref3 has a different data ID.
1482 metric = makeExampleMetrics()
1483 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1484 datasetType = self.addDatasetType(
1485 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1486 )
1487 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1488 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1489 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1491 # Simple prune.
1492 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1493 with self.assertRaises(LookupError):
1494 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1496 # Put data back.
1497 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1498 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1499 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1501 # Check that in normal mode, deleting the record will lead to
1502 # trash not touching the file.
1503 uri1 = butler.datastore.getURI(ref1)
1504 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1505 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1506 butler.datastore.trash(ref1)
1507 butler.datastore.emptyTrash()
1508 self.assertTrue(uri1.exists())
1509 uri1.remove() # Clean it up.
1511 # Simulate execution butler setup by deleting the datastore
1512 # record but keeping the file around and trusting.
1513 butler.datastore.trustGetRequest = True
1514 uri2 = butler.datastore.getURI(ref2)
1515 uri3 = butler.datastore.getURI(ref3)
1516 self.assertTrue(uri2.exists())
1517 self.assertTrue(uri3.exists())
1519 # Remove the datastore record.
1520 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1521 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1522 self.assertTrue(uri2.exists())
1523 butler.datastore.trash([ref2, ref3])
1524 # Immediate removal for ref2 file
1525 self.assertFalse(uri2.exists())
1526 # But ref3 has to wait for the empty.
1527 self.assertTrue(uri3.exists())
1528 butler.datastore.emptyTrash()
1529 self.assertFalse(uri3.exists())
1531 # Clear out the datasets from registry.
1532 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1534 def testPytypeCoercion(self):
1535 """Test python type coercion on Butler.get and put."""
1537 # Store some data with the normal example storage class.
1538 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1539 datasetTypeName = "test_metric"
1540 butler = self.runPutGetTest(storageClass, datasetTypeName)
1542 dataId = {"instrument": "DummyCamComp", "visit": 423}
1543 metric = butler.get(datasetTypeName, dataId=dataId)
1544 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1546 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1547 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1549 # Now need to hack the registry dataset type definition.
1550 # There is no API for this.
1551 manager = butler.registry._managers.datasets
1552 manager._db.update(
1553 manager._static.dataset_type,
1554 {"name": datasetTypeName},
1555 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1556 )
1558 # Force reset of dataset type cache
1559 butler.registry.refresh()
1561 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1562 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1563 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1565 metric_model = butler.get(datasetTypeName, dataId=dataId)
1566 self.assertNotEqual(type(metric_model), type(metric))
1567 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1569 # Put the model and read it back to show that everything now
1570 # works as normal.
1571 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1572 metric_model_new = butler.get(metric_ref)
1573 self.assertEqual(metric_model_new, metric_model)
1575 # Hack the storage class again to something that will fail on the
1576 # get with no conversion class.
1577 manager._db.update(
1578 manager._static.dataset_type,
1579 {"name": datasetTypeName},
1580 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1581 )
1582 butler.registry.refresh()
1584 with self.assertRaises(ValueError):
1585 butler.get(datasetTypeName, dataId=dataId)
1588@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1589class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1590 """PosixDatastore specialization of a butler using Postgres"""
1592 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1593 fullConfigKey = ".datastore.formatters"
1594 validationCanFail = True
1595 datastoreStr = ["/tmp"]
1596 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1597 registryStr = "PostgreSQL@test"
1599 @staticmethod
1600 def _handler(postgresql):
1601 engine = sqlalchemy.engine.create_engine(postgresql.url())
1602 with engine.begin() as connection:
1603 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1605 @classmethod
1606 def setUpClass(cls):
1607 # Create the postgres test server.
1608 cls.postgresql = testing.postgresql.PostgresqlFactory(
1609 cache_initialized_db=True, on_initialized=cls._handler
1610 )
1611 super().setUpClass()
1613 @classmethod
1614 def tearDownClass(cls):
1615 # Clean up any lingering SQLAlchemy engines/connections
1616 # so they're closed before we shut down the server.
1617 gc.collect()
1618 cls.postgresql.clear_cache()
1619 super().tearDownClass()
1621 def setUp(self):
1622 self.server = self.postgresql()
1624 # Need to add a registry section to the config.
1625 self._temp_config = False
1626 config = Config(self.configFile)
1627 config["registry", "db"] = self.server.url()
1628 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1629 config.dump(fh)
1630 self.configFile = fh.name
1631 self._temp_config = True
1632 super().setUp()
1634 def tearDown(self):
1635 self.server.stop()
1636 if self._temp_config and os.path.exists(self.configFile):
1637 os.remove(self.configFile)
1638 super().tearDown()
1640 def testMakeRepo(self):
1641 # The base class test assumes that it's using sqlite and assumes
1642 # the config file is acceptable to sqlite.
1643 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1646class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1647 """InMemoryDatastore specialization of a butler"""
1649 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1650 fullConfigKey = None
1651 useTempRoot = False
1652 validationCanFail = False
1653 datastoreStr = ["datastore='InMemory"]
1654 datastoreName = ["InMemoryDatastore@"]
1655 registryStr = "/gen3.sqlite3"
1657 def testIngest(self):
1658 pass
1661class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1662 """PosixDatastore specialization"""
1664 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1665 fullConfigKey = ".datastore.datastores.1.formatters"
1666 validationCanFail = True
1667 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1668 datastoreName = [
1669 "InMemoryDatastore@",
1670 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1671 "SecondDatastore",
1672 ]
1673 registryStr = "/gen3.sqlite3"
1676class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1677 """Test that a yaml file in one location can refer to a root in another."""
1679 datastoreStr = ["dir1"]
1680 # Disable the makeRepo test since we are deliberately not using
1681 # butler.yaml as the config name.
1682 fullConfigKey = None
1684 def setUp(self):
1685 self.root = makeTestTempDir(TESTDIR)
1687 # Make a new repository in one place
1688 self.dir1 = os.path.join(self.root, "dir1")
1689 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1691 # Move the yaml file to a different place and add a "root"
1692 self.dir2 = os.path.join(self.root, "dir2")
1693 os.makedirs(self.dir2, exist_ok=True)
1694 configFile1 = os.path.join(self.dir1, "butler.yaml")
1695 config = Config(configFile1)
1696 config["root"] = self.dir1
1697 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1698 config.dumpToUri(configFile2)
1699 os.remove(configFile1)
1700 self.tmpConfigFile = configFile2
1702 def testFileLocations(self):
1703 self.assertNotEqual(self.dir1, self.dir2)
1704 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1705 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1706 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1709class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1710 """Test that a config file created by makeRepo outside of repo works."""
1712 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1714 def setUp(self):
1715 self.root = makeTestTempDir(TESTDIR)
1716 self.root2 = makeTestTempDir(TESTDIR)
1718 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1719 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1721 def tearDown(self):
1722 if os.path.exists(self.root2):
1723 shutil.rmtree(self.root2, ignore_errors=True)
1724 super().tearDown()
1726 def testConfigExistence(self):
1727 c = Config(self.tmpConfigFile)
1728 uri_config = ResourcePath(c["root"])
1729 uri_expected = ResourcePath(self.root, forceDirectory=True)
1730 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1731 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1733 def testPutGet(self):
1734 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1735 self.runPutGetTest(storageClass, "test_metric")
1738class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1739 """Test that a config file created by makeRepo outside of repo works."""
1741 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1743 def setUp(self):
1744 self.root = makeTestTempDir(TESTDIR)
1745 self.root2 = makeTestTempDir(TESTDIR)
1747 self.tmpConfigFile = self.root2
1748 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1750 def testConfigExistence(self):
1751 # Append the yaml file else Config constructor does not know the file
1752 # type.
1753 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1754 super().testConfigExistence()
1757class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1758 """Test that a config file created by makeRepo outside of repo works."""
1760 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1762 def setUp(self):
1763 self.root = makeTestTempDir(TESTDIR)
1764 self.root2 = makeTestTempDir(TESTDIR)
1766 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1767 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1770@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1771class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1772 """S3Datastore specialization of a butler; an S3 storage Datastore +
1773 a local in-memory SqlRegistry.
1774 """
1776 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1777 fullConfigKey = None
1778 validationCanFail = True
1780 bucketName = "anybucketname"
1781 """Name of the Bucket that will be used in the tests. The name is read from
1782 the config file used with the tests during set-up.
1783 """
1785 root = "butlerRoot/"
1786 """Root repository directory expected to be used in case useTempRoot=False.
1787 Otherwise the root is set to a 20 characters long randomly generated string
1788 during set-up.
1789 """
1791 datastoreStr = [f"datastore={root}"]
1792 """Contains all expected root locations in a format expected to be
1793 returned by Butler stringification.
1794 """
1796 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1797 """The expected format of the S3 Datastore string."""
1799 registryStr = "/gen3.sqlite3"
1800 """Expected format of the Registry string."""
1802 mock_s3 = mock_s3()
1803 """The mocked s3 interface from moto."""
1805 def genRoot(self):
1806 """Returns a random string of len 20 to serve as a root
1807 name for the temporary bucket repo.
1809 This is equivalent to tempfile.mkdtemp as this is what self.root
1810 becomes when useTempRoot is True.
1811 """
1812 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1813 return rndstr + "/"
1815 def setUp(self):
1816 config = Config(self.configFile)
1817 uri = ResourcePath(config[".datastore.datastore.root"])
1818 self.bucketName = uri.netloc
1820 # Enable S3 mocking of tests.
1821 self.mock_s3.start()
1823 # set up some fake credentials if they do not exist
1824 self.usingDummyCredentials = setAwsEnvCredentials()
1826 if self.useTempRoot:
1827 self.root = self.genRoot()
1828 rooturi = f"s3://{self.bucketName}/{self.root}"
1829 config.update({"datastore": {"datastore": {"root": rooturi}}})
1831 # need local folder to store registry database
1832 self.reg_dir = makeTestTempDir(TESTDIR)
1833 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1835 # MOTO needs to know that we expect Bucket bucketname to exist
1836 # (this used to be the class attribute bucketName)
1837 s3 = boto3.resource("s3")
1838 s3.create_bucket(Bucket=self.bucketName)
1840 self.datastoreStr = f"datastore={self.root}"
1841 self.datastoreName = [f"FileDatastore@{rooturi}"]
1842 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1843 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1845 def tearDown(self):
1846 s3 = boto3.resource("s3")
1847 bucket = s3.Bucket(self.bucketName)
1848 try:
1849 bucket.objects.all().delete()
1850 except botocore.exceptions.ClientError as e:
1851 if e.response["Error"]["Code"] == "404":
1852 # the key was not reachable - pass
1853 pass
1854 else:
1855 raise
1857 bucket = s3.Bucket(self.bucketName)
1858 bucket.delete()
1860 # Stop the S3 mock.
1861 self.mock_s3.stop()
1863 # unset any potentially set dummy credentials
1864 if self.usingDummyCredentials:
1865 unsetAwsEnvCredentials()
1867 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1868 shutil.rmtree(self.reg_dir, ignore_errors=True)
1870 if self.useTempRoot and os.path.exists(self.root):
1871 shutil.rmtree(self.root, ignore_errors=True)
1873 super().tearDown()
1876@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1877class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1878 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1879 a local in-memory SqlRegistry.
1880 """
1882 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1883 fullConfigKey = None
1884 validationCanFail = True
1886 serverName = "localhost"
1887 """Name of the server that will be used in the tests.
1888 """
1890 portNumber = 8080
1891 """Port on which the webdav server listens. Automatically chosen
1892 at setUpClass via the _getfreeport() method
1893 """
1895 root = "butlerRoot/"
1896 """Root repository directory expected to be used in case useTempRoot=False.
1897 Otherwise the root is set to a 20 characters long randomly generated string
1898 during set-up.
1899 """
1901 datastoreStr = [f"datastore={root}"]
1902 """Contains all expected root locations in a format expected to be
1903 returned by Butler stringification.
1904 """
1906 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1907 """The expected format of the WebdavDatastore string."""
1909 registryStr = "/gen3.sqlite3"
1910 """Expected format of the Registry string."""
1912 serverThread = None
1913 """Thread in which the local webdav server will run"""
1915 stopWebdavServer = False
1916 """This flag will cause the webdav server to
1917 gracefully shut down when True
1918 """
1920 def genRoot(self):
1921 """Returns a random string of len 20 to serve as a root
1922 name for the temporary bucket repo.
1924 This is equivalent to tempfile.mkdtemp as this is what self.root
1925 becomes when useTempRoot is True.
1926 """
1927 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1928 return rndstr + "/"
1930 @classmethod
1931 def setUpClass(cls):
1932 # Do the same as inherited class
1933 cls.storageClassFactory = StorageClassFactory()
1934 cls.storageClassFactory.addFromConfig(cls.configFile)
1936 cls.portNumber = cls._getfreeport()
1937 # Run a local webdav server on which tests will be run
1938 cls.serverThread = Thread(
1939 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True
1940 )
1941 cls.serverThread.start()
1942 # Wait for it to start
1943 time.sleep(3)
1945 @classmethod
1946 def tearDownClass(cls):
1947 # Ask for graceful shut down of the webdav server
1948 cls.stopWebdavServer = True
1949 # Wait for the thread to exit
1950 cls.serverThread.join()
1951 super().tearDownClass()
1953 def setUp(self):
1954 config = Config(self.configFile)
1956 if self.useTempRoot:
1957 self.root = self.genRoot()
1958 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1959 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1961 # need local folder to store registry database
1962 self.reg_dir = makeTestTempDir(TESTDIR)
1963 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1965 self.datastoreStr = f"datastore={self.root}"
1966 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1968 if not _is_webdav_endpoint(self.rooturi):
1969 raise OSError("Webdav server not running properly: cannot run tests.")
1971 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1972 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1974 def tearDown(self):
1975 # Clear temporary directory
1976 ResourcePath(self.rooturi).remove()
1977 ResourcePath(self.rooturi).session.close()
1979 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1980 shutil.rmtree(self.reg_dir, ignore_errors=True)
1982 if self.useTempRoot and os.path.exists(self.root):
1983 shutil.rmtree(self.root, ignore_errors=True)
1985 super().tearDown()
1987 def _serveWebdav(self, port: int, stopWebdavServer):
1988 """Starts a local webdav-compatible HTTP server,
1989 Listening on http://localhost:port
1990 This server only runs when this test class is instantiated,
1991 and then shuts down. Must be started is a separate thread.
1993 Parameters
1994 ----------
1995 port : `int`
1996 The port number on which the server should listen
1997 """
1998 root_path = gettempdir()
2000 config = {
2001 "host": "0.0.0.0",
2002 "port": port,
2003 "provider_mapping": {"/": root_path},
2004 "http_authenticator": {"domain_controller": None},
2005 "simple_dc": {"user_mapping": {"*": True}},
2006 "verbose": 0,
2007 }
2008 app = WsgiDAVApp(config)
2010 server_args = {
2011 "bind_addr": (config["host"], config["port"]),
2012 "wsgi_app": app,
2013 }
2014 server = wsgi.Server(**server_args)
2015 server.prepare()
2017 try:
2018 # Start the actual server in a separate thread
2019 t = Thread(target=server.serve, daemon=True)
2020 t.start()
2021 # watch stopWebdavServer, and gracefully
2022 # shut down the server when True
2023 while True:
2024 if stopWebdavServer():
2025 break
2026 time.sleep(1)
2027 except KeyboardInterrupt:
2028 print("Caught Ctrl-C, shutting down...")
2029 finally:
2030 server.stop()
2031 t.join()
2033 def _getfreeport():
2034 """
2035 Determines a free port using sockets.
2036 """
2037 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
2038 free_socket.bind(("127.0.0.1", 0))
2039 free_socket.listen()
2040 port = free_socket.getsockname()[1]
2041 free_socket.close()
2042 return port
2045class PosixDatastoreTransfers(unittest.TestCase):
2046 """Test data transfers between butlers.
2048 Test for different managers. UUID to UUID and integer to integer are
2049 tested. UUID to integer is not supported since we do not currently
2050 want to allow that. Integer to UUID is supported with the caveat
2051 that UUID4 will be generated and this will be incorrect for raw
2052 dataset types. The test ignores that.
2053 """
2055 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2057 @classmethod
2058 def setUpClass(cls):
2059 cls.storageClassFactory = StorageClassFactory()
2060 cls.storageClassFactory.addFromConfig(cls.configFile)
2062 def setUp(self):
2063 self.root = makeTestTempDir(TESTDIR)
2064 self.config = Config(self.configFile)
2066 def tearDown(self):
2067 removeTestTempDir(self.root)
2069 def create_butler(self, manager, label):
2070 config = Config(self.configFile)
2071 config["registry", "managers", "datasets"] = manager
2072 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2074 def create_butlers(self, manager1, manager2):
2075 self.source_butler = self.create_butler(manager1, "1")
2076 self.target_butler = self.create_butler(manager2, "2")
2078 def testTransferUuidToUuid(self):
2079 self.create_butlers(
2080 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2081 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2082 )
2083 # Setting id_gen_map should have no effect here
2084 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
2086 def testTransferIntToInt(self):
2087 with self.assertWarns(FutureWarning):
2088 self.create_butlers(
2089 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2090 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2091 )
2092 # int dataset ID only allows UNIQUE
2093 self.assertButlerTransfers()
2095 def testTransferIntToUuid(self):
2096 with self.assertWarns(FutureWarning):
2097 self.create_butlers(
2098 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager",
2099 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2100 )
2101 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
2103 def testTransferMissing(self):
2104 """Test transfers where datastore records are missing.
2106 This is how execution butler works.
2107 """
2108 self.create_butlers(
2109 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2110 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2111 )
2113 # Configure the source butler to allow trust.
2114 self.source_butler.datastore.trustGetRequest = True
2116 self.assertButlerTransfers(purge=True)
2118 def testTransferMissingDisassembly(self):
2119 """Test transfers where datastore records are missing.
2121 This is how execution butler works.
2122 """
2123 self.create_butlers(
2124 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2125 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID",
2126 )
2128 # Configure the source butler to allow trust.
2129 self.source_butler.datastore.trustGetRequest = True
2131 # Test disassembly.
2132 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2134 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
2135 """Test that a run can be transferred to another butler."""
2137 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2138 datasetTypeName = "random_data"
2140 # Test will create 3 collections and we will want to transfer
2141 # two of those three.
2142 runs = ["run1", "run2", "other"]
2144 # Also want to use two different dataset types to ensure that
2145 # grouping works.
2146 datasetTypeNames = ["random_data", "random_data_2"]
2148 # Create the run collections in the source butler.
2149 for run in runs:
2150 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2152 # Create dimensions in source butler.
2153 n_exposures = 30
2154 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2155 self.source_butler.registry.insertDimensionData(
2156 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2157 )
2158 self.source_butler.registry.insertDimensionData(
2159 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2160 )
2162 for i in range(n_exposures):
2163 self.source_butler.registry.insertDimensionData(
2164 "exposure",
2165 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2166 )
2168 # Create dataset types in the source butler.
2169 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"])
2170 for datasetTypeName in datasetTypeNames:
2171 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2172 self.source_butler.registry.registerDatasetType(datasetType)
2174 # Write a dataset to an unrelated run -- this will ensure that
2175 # we are rewriting integer dataset ids in the target if necessary.
2176 # Will not be relevant for UUID.
2177 run = "distraction"
2178 butler = Butler(butler=self.source_butler, run=run)
2179 butler.put(
2180 makeExampleMetrics(),
2181 datasetTypeName,
2182 exposure=1,
2183 instrument="DummyCamComp",
2184 physical_filter="d-r",
2185 )
2187 # Write some example metrics to the source
2188 butler = Butler(butler=self.source_butler)
2190 # Set of DatasetRefs that should be in the list of refs to transfer
2191 # but which will not be transferred.
2192 deleted = set()
2194 n_expected = 20 # Number of datasets expected to be transferred
2195 source_refs = []
2196 for i in range(n_exposures):
2197 # Put a third of datasets into each collection, only retain
2198 # two thirds.
2199 index = i % 3
2200 run = runs[index]
2201 datasetTypeName = datasetTypeNames[i % 2]
2203 metric_data = {
2204 "summary": {"counter": i},
2205 "output": {"text": "metric"},
2206 "data": [2 * x for x in range(i)],
2207 }
2208 metric = MetricsExample(**metric_data)
2209 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2210 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2212 # Remove the datastore record using low-level API
2213 if purge:
2214 # Remove records for a fraction.
2215 if index == 1:
2217 # For one of these delete the file as well.
2218 # This allows the "missing" code to filter the
2219 # file out.
2220 if not deleted:
2221 primary, uris = butler.datastore.getURIs(ref)
2222 if primary:
2223 primary.remove()
2224 for uri in uris.values():
2225 uri.remove()
2226 n_expected -= 1
2227 deleted.add(ref)
2229 # Remove the datastore record.
2230 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
2232 if index < 2:
2233 source_refs.append(ref)
2234 if ref not in deleted:
2235 new_metric = butler.get(ref.unresolved(), collections=run)
2236 self.assertEqual(new_metric, metric)
2238 # Create some bad dataset types to ensure we check for inconsistent
2239 # definitions.
2240 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2241 for datasetTypeName in datasetTypeNames:
2242 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2243 self.target_butler.registry.registerDatasetType(datasetType)
2244 with self.assertRaises(ConflictingDefinitionError) as cm:
2245 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
2246 self.assertIn("dataset type differs", str(cm.exception))
2248 # And remove the bad definitions.
2249 for datasetTypeName in datasetTypeNames:
2250 self.target_butler.registry.removeDatasetType(datasetTypeName)
2252 # Transfer without creating dataset types should fail.
2253 with self.assertRaises(KeyError):
2254 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map)
2256 # Transfer without creating dimensions should fail.
2257 with self.assertRaises(ConflictingDefinitionError) as cm:
2258 self.target_butler.transfer_from(
2259 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True
2260 )
2261 self.assertIn("dimension", str(cm.exception))
2263 # The failed transfer above leaves registry in an inconsistent
2264 # state because the run is created but then rolled back without
2265 # the collection cache being cleared. For now force a refresh.
2266 # Can remove with DM-35498.
2267 self.target_butler.registry.refresh()
2269 # Now transfer them to the second butler, including dimensions.
2270 with self.assertLogs(level=logging.DEBUG) as cm:
2271 transferred = self.target_butler.transfer_from(
2272 self.source_butler,
2273 source_refs,
2274 id_gen_map=id_gen_map,
2275 register_dataset_types=True,
2276 transfer_dimensions=True,
2277 )
2278 self.assertEqual(len(transferred), n_expected)
2279 log_output = ";".join(cm.output)
2280 self.assertIn("found in datastore for chunk", log_output)
2281 self.assertIn("Creating output run", log_output)
2283 # Do the transfer twice to ensure that it will do nothing extra.
2284 # Only do this if purge=True because it does not work for int
2285 # dataset_id.
2286 if purge:
2287 # This should not need to register dataset types.
2288 transferred = self.target_butler.transfer_from(
2289 self.source_butler, source_refs, id_gen_map=id_gen_map
2290 )
2291 self.assertEqual(len(transferred), n_expected)
2293 # Also do an explicit low-level transfer to trigger some
2294 # edge cases.
2295 with self.assertLogs(level=logging.DEBUG) as cm:
2296 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
2297 log_output = ";".join(cm.output)
2298 self.assertIn("no file artifacts exist", log_output)
2300 with self.assertRaises(TypeError):
2301 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
2303 with self.assertRaises(ValueError):
2304 self.target_butler.datastore.transfer_from(
2305 self.source_butler.datastore, source_refs, transfer="split"
2306 )
2308 # Now try to get the same refs from the new butler.
2309 for ref in source_refs:
2310 if ref not in deleted:
2311 unresolved_ref = ref.unresolved()
2312 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
2313 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
2314 self.assertEqual(new_metric, old_metric)
2316 # Now prune run2 collection and create instead a CHAINED collection.
2317 # This should block the transfer.
2318 self.target_butler.pruneCollection("run2", purge=True, unstore=True)
2319 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2320 with self.assertRaises(CollectionTypeError):
2321 # Re-importing the run1 datasets can be problematic if they
2322 # use integer IDs so filter those out.
2323 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2324 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map)
2327if __name__ == "__main__": 2327 ↛ 2328line 2327 didn't jump to line 2328, because the condition on line 2327 was never true
2328 unittest.main()