Coverage for tests/test_butler.py: 13%
1263 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
24from __future__ import annotations
26import gc
27import json
28import logging
29import os
30import pathlib
31import pickle
32import posixpath
33import random
34import shutil
35import string
36import tempfile
37import unittest
38import uuid
39from collections.abc import Mapping
40from typing import TYPE_CHECKING, Any, cast
42try:
43 import boto3
44 import botocore
45 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
46 from moto import mock_s3 # type: ignore[import]
47except ImportError:
48 boto3 = None
50 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
51 """No-op decorator in case moto mock_s3 can not be imported."""
52 return None
55try:
56 # It's possible but silly to have testing.postgresql installed without
57 # having the postgresql server installed (because then nothing in
58 # testing.postgresql would work), so we use the presence of that module
59 # to test whether we can expect the server to be available.
60 import testing.postgresql # type: ignore[import]
61except ImportError:
62 testing = None
64import astropy.time
65import sqlalchemy
66from lsst.daf.butler import (
67 Butler,
68 ButlerConfig,
69 ButlerRepoIndex,
70 CollectionType,
71 Config,
72 DataCoordinate,
73 DatasetExistence,
74 DatasetRef,
75 DatasetType,
76 FileDataset,
77 FileTemplate,
78 FileTemplateValidationError,
79 StorageClassFactory,
80 ValidationError,
81 script,
82)
83from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
84from lsst.daf.butler.datastores.fileDatastore import FileDatastore
85from lsst.daf.butler.registries.sql import SqlRegistry
86from lsst.daf.butler.registry import (
87 CollectionError,
88 CollectionTypeError,
89 ConflictingDefinitionError,
90 DataIdValueError,
91 MissingCollectionError,
92 OrphanedRecordError,
93)
94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
95from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
96from lsst.resources import ResourcePath
97from lsst.utils import doImportType
98from lsst.utils.introspection import get_full_type_name
100if TYPE_CHECKING:
101 import types
103 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass
105TESTDIR = os.path.abspath(os.path.dirname(__file__))
108def clean_environment() -> None:
109 """Remove external environment variables that affect the tests."""
110 for k in (
111 "DAF_BUTLER_REPOSITORY_INDEX",
112 "S3_ENDPOINT_URL",
113 "AWS_ACCESS_KEY_ID",
114 "AWS_SECRET_ACCESS_KEY",
115 "AWS_SHARED_CREDENTIALS_FILE",
116 ):
117 os.environ.pop(k, None)
120def makeExampleMetrics() -> MetricsExample:
121 """Return example dataset suitable for tests."""
122 return MetricsExample(
123 {"AM1": 5.2, "AM2": 30.6},
124 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
125 [563, 234, 456.7, 752, 8, 9, 27],
126 )
129class TransactionTestError(Exception):
130 """Specific error for testing transactions, to prevent misdiagnosing
131 that might otherwise occur when a standard exception is used.
132 """
134 pass
137class ButlerConfigTests(unittest.TestCase):
138 """Simple tests for ButlerConfig that are not tested in any other test
139 cases.
140 """
142 def testSearchPath(self) -> None:
143 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
144 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
145 config1 = ButlerConfig(configFile)
146 self.assertNotIn("testConfigs", "\n".join(cm.output))
148 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
149 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
150 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
151 self.assertIn("testConfigs", "\n".join(cm.output))
153 key = ("datastore", "records", "table")
154 self.assertNotEqual(config1[key], config2[key])
155 self.assertEqual(config2[key], "override_record")
158class ButlerPutGetTests(TestCaseMixin):
159 """Helper method for running a suite of put/get tests from different
160 butler configurations.
161 """
163 root: str
164 default_run = "ingésτ😺"
165 storageClassFactory: StorageClassFactory
166 configFile: str
167 tmpConfigFile: str
169 @staticmethod
170 def addDatasetType(
171 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry
172 ) -> DatasetType:
173 """Create a DatasetType and register it"""
174 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
175 registry.registerDatasetType(datasetType)
176 return datasetType
178 @classmethod
179 def setUpClass(cls) -> None:
180 cls.storageClassFactory = StorageClassFactory()
181 cls.storageClassFactory.addFromConfig(cls.configFile)
183 def assertGetComponents(
184 self,
185 butler: Butler,
186 datasetRef: DatasetRef,
187 components: tuple[str, ...],
188 reference: Any,
189 collections: Any = None,
190 ) -> None:
191 datasetType = datasetRef.datasetType
192 dataId = datasetRef.dataId
193 deferred = butler.getDeferred(datasetRef)
195 for component in components:
196 compTypeName = datasetType.componentTypeName(component)
197 result = butler.get(compTypeName, dataId, collections=collections)
198 self.assertEqual(result, getattr(reference, component))
199 result_deferred = deferred.get(component=component)
200 self.assertEqual(result_deferred, result)
202 def tearDown(self) -> None:
203 removeTestTempDir(self.root)
205 def create_butler(
206 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
207 ) -> tuple[Butler, DatasetType]:
208 butler = Butler(self.tmpConfigFile, run=run)
210 collections = set(butler.registry.queryCollections())
211 self.assertEqual(collections, {run})
213 # Create and register a DatasetType
214 dimensions = butler.dimensions.extract(["instrument", "visit"])
216 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
218 # Add needed Dimensions
219 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
220 butler.registry.insertDimensionData(
221 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
222 )
223 butler.registry.insertDimensionData(
224 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
225 )
226 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
227 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
228 butler.registry.insertDimensionData(
229 "visit",
230 {
231 "instrument": "DummyCamComp",
232 "id": 423,
233 "name": "fourtwentythree",
234 "physical_filter": "d-r",
235 "visit_system": 1,
236 "datetime_begin": visit_start,
237 "datetime_end": visit_end,
238 },
239 )
241 # Add more visits for some later tests
242 for visit_id in (424, 425):
243 butler.registry.insertDimensionData(
244 "visit",
245 {
246 "instrument": "DummyCamComp",
247 "id": visit_id,
248 "name": f"fourtwentyfour_{visit_id}",
249 "physical_filter": "d-r",
250 "visit_system": 1,
251 },
252 )
253 return butler, datasetType
255 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler:
256 # New datasets will be added to run and tag, but we will only look in
257 # tag when looking up datasets.
258 run = self.default_run
259 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
260 assert butler.run is not None
262 # Create and store a dataset
263 metric = makeExampleMetrics()
264 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
266 # Put and remove the dataset once as a DatasetRef, once as a dataId,
267 # and once with a DatasetType
269 # Keep track of any collections we add and do not clean up
270 expected_collections = {run}
272 counter = 0
273 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
274 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
275 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
276 # Since we are using subTest we can get cascading failures
277 # here with the first attempt failing and the others failing
278 # immediately because the dataset already exists. Work around
279 # this by using a distinct run collection each time
280 counter += 1
281 this_run = f"put_run_{counter}"
282 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
283 expected_collections.update({this_run})
285 with self.subTest(args=args):
286 kwargs: dict[str, Any] = {}
287 if not isinstance(args[0], DatasetRef): # type: ignore
288 kwargs["run"] = this_run
289 ref = butler.put(metric, *args, **kwargs)
290 self.assertIsInstance(ref, DatasetRef)
292 # Test getDirect
293 metricOut = butler.get(ref)
294 self.assertEqual(metric, metricOut)
295 # Test get
296 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
297 self.assertEqual(metric, metricOut)
298 # Test get with a datasetRef
299 metricOut = butler.get(ref)
300 self.assertEqual(metric, metricOut)
301 # Test getDeferred with dataId
302 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
303 self.assertEqual(metric, metricOut)
304 # Test getDeferred with a ref
305 metricOut = butler.getDeferred(ref).get()
306 self.assertEqual(metric, metricOut)
308 # Check we can get components
309 if storageClass.isComposite():
310 self.assertGetComponents(
311 butler, ref, ("summary", "data", "output"), metric, collections=this_run
312 )
314 # Can the artifacts themselves be retrieved?
315 if not butler._datastore.isEphemeral:
316 root_uri = ResourcePath(self.root)
318 for preserve_path in (True, False):
319 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
320 # Use copy so that we can test that overwrite
321 # protection works (using "auto" for File URIs would
322 # use hard links and subsequent transfer would work
323 # because it knows they are the same file).
324 transferred = butler.retrieveArtifacts(
325 [ref], destination, preserve_path=preserve_path, transfer="copy"
326 )
327 self.assertGreater(len(transferred), 0)
328 artifacts = list(ResourcePath.findFileResources([destination]))
329 self.assertEqual(set(transferred), set(artifacts))
331 for artifact in transferred:
332 path_in_destination = artifact.relative_to(destination)
333 self.assertIsNotNone(path_in_destination)
334 assert path_in_destination is not None
336 # when path is not preserved there should not be
337 # any path separators.
338 num_seps = path_in_destination.count("/")
339 if preserve_path:
340 self.assertGreater(num_seps, 0)
341 else:
342 self.assertEqual(num_seps, 0)
344 primary_uri, secondary_uris = butler.getURIs(ref)
345 n_uris = len(secondary_uris)
346 if primary_uri:
347 n_uris += 1
348 self.assertEqual(
349 len(artifacts),
350 n_uris,
351 "Comparing expected artifacts vs actual:"
352 f" {artifacts} vs {primary_uri} and {secondary_uris}",
353 )
355 if preserve_path:
356 # No need to run these twice
357 with self.assertRaises(ValueError):
358 butler.retrieveArtifacts([ref], destination, transfer="move")
360 with self.assertRaises(FileExistsError):
361 butler.retrieveArtifacts([ref], destination)
363 transferred_again = butler.retrieveArtifacts(
364 [ref], destination, preserve_path=preserve_path, overwrite=True
365 )
366 self.assertEqual(set(transferred_again), set(transferred))
368 # Now remove the dataset completely.
369 butler.pruneDatasets([ref], purge=True, unstore=True)
370 # Lookup with original args should still fail.
371 kwargs = {"collections": this_run}
372 if isinstance(args[0], DatasetRef):
373 kwargs = {} # Prevent warning from being issued.
374 self.assertFalse(butler.exists(*args, **kwargs))
375 # get() should still fail.
376 with self.assertRaises(FileNotFoundError):
377 butler.get(ref)
378 # Registry shouldn't be able to find it by dataset_id anymore.
379 self.assertIsNone(butler.registry.getDataset(ref.id))
381 # Do explicit registry removal since we know they are
382 # empty
383 butler.registry.removeCollection(this_run)
384 expected_collections.remove(this_run)
386 # Create DatasetRef for put using default run.
387 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
389 # Put the dataset again, since the last thing we did was remove it
390 # and we want to use the default collection.
391 ref = butler.put(metric, refIn)
393 # Get with parameters
394 stop = 4
395 sliced = butler.get(ref, parameters={"slice": slice(stop)})
396 self.assertNotEqual(metric, sliced)
397 self.assertEqual(metric.summary, sliced.summary)
398 self.assertEqual(metric.output, sliced.output)
399 assert metric.data is not None # for mypy
400 self.assertEqual(metric.data[:stop], sliced.data)
401 # getDeferred with parameters
402 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
403 self.assertNotEqual(metric, sliced)
404 self.assertEqual(metric.summary, sliced.summary)
405 self.assertEqual(metric.output, sliced.output)
406 self.assertEqual(metric.data[:stop], sliced.data)
407 # getDeferred with deferred parameters
408 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
409 self.assertNotEqual(metric, sliced)
410 self.assertEqual(metric.summary, sliced.summary)
411 self.assertEqual(metric.output, sliced.output)
412 self.assertEqual(metric.data[:stop], sliced.data)
414 if storageClass.isComposite():
415 # Check that components can be retrieved
416 metricOut = butler.get(ref.datasetType.name, dataId)
417 compNameS = ref.datasetType.componentTypeName("summary")
418 compNameD = ref.datasetType.componentTypeName("data")
419 summary = butler.get(compNameS, dataId)
420 self.assertEqual(summary, metric.summary)
421 data = butler.get(compNameD, dataId)
422 self.assertEqual(data, metric.data)
424 if "counter" in storageClass.derivedComponents:
425 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
426 self.assertEqual(count, len(data))
428 count = butler.get(
429 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
430 )
431 self.assertEqual(count, stop)
433 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
434 assert compRef is not None
435 summary = butler.get(compRef)
436 self.assertEqual(summary, metric.summary)
438 # Create a Dataset type that has the same name but is inconsistent.
439 inconsistentDatasetType = DatasetType(
440 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
441 )
443 # Getting with a dataset type that does not match registry fails
444 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
445 butler.get(inconsistentDatasetType, dataId)
447 # Combining a DatasetRef with a dataId should fail
448 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
449 butler.get(ref, dataId)
450 # Getting with an explicit ref should fail if the id doesn't match.
451 with self.assertRaises(FileNotFoundError):
452 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
454 # Getting a dataset with unknown parameters should fail
455 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
456 butler.get(ref, parameters={"unsupported": True})
458 # Check we have a collection
459 collections = set(butler.registry.queryCollections())
460 self.assertEqual(collections, expected_collections)
462 # Clean up to check that we can remove something that may have
463 # already had a component removed
464 butler.pruneDatasets([ref], unstore=True, purge=True)
466 # Add the same ref again, so we can check that duplicate put fails.
467 ref = butler.put(metric, datasetType, dataId)
469 # Repeat put will fail.
470 with self.assertRaisesRegex(
471 ConflictingDefinitionError, "A database constraint failure was triggered"
472 ):
473 butler.put(metric, datasetType, dataId)
475 # Remove the datastore entry.
476 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
478 # Put will still fail
479 with self.assertRaisesRegex(
480 ConflictingDefinitionError, "A database constraint failure was triggered"
481 ):
482 butler.put(metric, datasetType, dataId)
484 # Repeat the same sequence with resolved ref.
485 butler.pruneDatasets([ref], unstore=True, purge=True)
486 ref = butler.put(metric, refIn)
488 # Repeat put will fail.
489 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
490 butler.put(metric, refIn)
492 # Remove the datastore entry.
493 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
495 # In case of resolved ref this write will succeed.
496 ref = butler.put(metric, refIn)
498 # Leave the dataset in place since some downstream tests require
499 # something to be present
501 return butler
503 def testDeferredCollectionPassing(self) -> None:
504 # Construct a butler with no run or collection, but make it writeable.
505 butler = Butler(self.tmpConfigFile, writeable=True)
506 # Create and register a DatasetType
507 dimensions = butler.dimensions.extract(["instrument", "visit"])
508 datasetType = self.addDatasetType(
509 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
510 )
511 # Add needed Dimensions
512 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
513 butler.registry.insertDimensionData(
514 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
515 )
516 butler.registry.insertDimensionData(
517 "visit",
518 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
519 )
520 dataId = {"instrument": "DummyCamComp", "visit": 423}
521 # Create dataset.
522 metric = makeExampleMetrics()
523 # Register a new run and put dataset.
524 run = "deferred"
525 self.assertTrue(butler.registry.registerRun(run))
526 # Second time it will be allowed but indicate no-op
527 self.assertFalse(butler.registry.registerRun(run))
528 ref = butler.put(metric, datasetType, dataId, run=run)
529 # Putting with no run should fail with TypeError.
530 with self.assertRaises(CollectionError):
531 butler.put(metric, datasetType, dataId)
532 # Dataset should exist.
533 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
534 # We should be able to get the dataset back, but with and without
535 # a deferred dataset handle.
536 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
537 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
538 # Trying to find the dataset without any collection is a TypeError.
539 self.assertFalse(butler.exists(datasetType, dataId))
540 with self.assertRaises(CollectionError):
541 butler.get(datasetType, dataId)
542 # Associate the dataset with a different collection.
543 butler.registry.registerCollection("tagged")
544 butler.registry.associate("tagged", [ref])
545 # Deleting the dataset from the new collection should make it findable
546 # in the original collection.
547 butler.pruneDatasets([ref], tags=["tagged"])
548 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
551class ButlerTests(ButlerPutGetTests):
552 """Tests for Butler."""
554 useTempRoot = True
555 validationCanFail: bool
556 fullConfigKey: str | None
557 registryStr: str | None
558 datastoreName: list[str] | None
559 datastoreStr: list[str]
561 def setUp(self) -> None:
562 """Create a new butler root for each test."""
563 self.root = makeTestTempDir(TESTDIR)
564 Butler.makeRepo(self.root, config=Config(self.configFile))
565 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
567 def testConstructor(self) -> None:
568 """Independent test of constructor."""
569 butler = Butler(self.tmpConfigFile, run=self.default_run)
570 self.assertIsInstance(butler, Butler)
572 # Check that butler.yaml is added automatically.
573 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
574 config_dir = self.tmpConfigFile[: -len(end)]
575 butler = Butler(config_dir, run=self.default_run)
576 self.assertIsInstance(butler, Butler)
578 # Even with a ResourcePath.
579 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
580 self.assertIsInstance(butler, Butler)
582 collections = set(butler.registry.queryCollections())
583 self.assertEqual(collections, {self.default_run})
585 # Check that some special characters can be included in run name.
586 special_run = "u@b.c-A"
587 butler_special = Butler(butler=butler, run=special_run)
588 collections = set(butler_special.registry.queryCollections("*@*"))
589 self.assertEqual(collections, {special_run})
591 butler2 = Butler(butler=butler, collections=["other"])
592 self.assertEqual(butler2.collections, ("other",))
593 self.assertIsNone(butler2.run)
594 self.assertIs(butler._datastore, butler2._datastore)
596 # Test that we can use an environment variable to find this
597 # repository.
598 butler_index = Config()
599 butler_index["label"] = self.tmpConfigFile
600 for suffix in (".yaml", ".json"):
601 # Ensure that the content differs so that we know that
602 # we aren't reusing the cache.
603 bad_label = f"file://bucket/not_real{suffix}"
604 butler_index["bad_label"] = bad_label
605 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
606 butler_index.dumpToUri(temp_file)
607 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
608 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
609 uri = Butler.get_repo_uri("bad_label")
610 self.assertEqual(uri, ResourcePath(bad_label))
611 uri = Butler.get_repo_uri("label")
612 butler = Butler(uri, writeable=False)
613 self.assertIsInstance(butler, Butler)
614 butler = Butler("label", writeable=False)
615 self.assertIsInstance(butler, Butler)
616 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
617 Butler("not_there", writeable=False)
618 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
619 Butler("bad_label")
620 with self.assertRaises(FileNotFoundError):
621 # Should ignore aliases.
622 Butler(ResourcePath("label", forceAbsolute=False))
623 with self.assertRaises(KeyError) as cm:
624 Butler.get_repo_uri("missing")
625 self.assertEqual(
626 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
627 )
628 self.assertIn("not known to", str(cm.exception))
629 # Should report no failure.
630 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
631 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
632 # Now with empty configuration.
633 butler_index = Config()
634 butler_index.dumpToUri(temp_file)
635 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
636 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
637 Butler("label")
638 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
639 # Now with bad contents.
640 with open(temp_file.ospath, "w") as fh:
641 print("'", file=fh)
642 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
643 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
644 Butler("label")
645 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
646 with self.assertRaises(FileNotFoundError):
647 Butler.get_repo_uri("label")
648 self.assertEqual(Butler.get_known_repos(), set())
650 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
651 Butler("label")
653 # Check that we can create Butler when the alias file is not found.
654 butler = Butler(self.tmpConfigFile, writeable=False)
655 self.assertIsInstance(butler, Butler)
656 with self.assertRaises(KeyError) as cm:
657 # No environment variable set.
658 Butler.get_repo_uri("label")
659 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
660 self.assertIn("No repository index defined", str(cm.exception))
661 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
662 # No aliases registered.
663 Butler("not_there")
664 self.assertEqual(Butler.get_known_repos(), set())
666 def testBasicPutGet(self) -> None:
667 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
668 self.runPutGetTest(storageClass, "test_metric")
670 def testCompositePutGetConcrete(self) -> None:
671 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
672 butler = self.runPutGetTest(storageClass, "test_metric")
674 # Should *not* be disassembled
675 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
676 self.assertEqual(len(datasets), 1)
677 uri, components = butler.getURIs(datasets[0])
678 self.assertIsInstance(uri, ResourcePath)
679 self.assertFalse(components)
680 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
681 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
683 # Predicted dataset
684 dataId = {"instrument": "DummyCamComp", "visit": 424}
685 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
686 self.assertFalse(components)
687 self.assertIsInstance(uri, ResourcePath)
688 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
689 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
691 def testCompositePutGetVirtual(self) -> None:
692 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
693 butler = self.runPutGetTest(storageClass, "test_metric_comp")
695 # Should be disassembled
696 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
697 self.assertEqual(len(datasets), 1)
698 uri, components = butler.getURIs(datasets[0])
700 if butler._datastore.isEphemeral:
701 # Never disassemble in-memory datastore
702 self.assertIsInstance(uri, ResourcePath)
703 self.assertFalse(components)
704 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
705 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
706 else:
707 self.assertIsNone(uri)
708 self.assertEqual(set(components), set(storageClass.components))
709 for compuri in components.values():
710 self.assertIsInstance(compuri, ResourcePath)
711 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
712 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
714 # Predicted dataset
715 dataId = {"instrument": "DummyCamComp", "visit": 424}
716 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
718 if butler._datastore.isEphemeral:
719 # Never disassembled
720 self.assertIsInstance(uri, ResourcePath)
721 self.assertFalse(components)
722 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
723 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
724 else:
725 self.assertIsNone(uri)
726 self.assertEqual(set(components), set(storageClass.components))
727 for compuri in components.values():
728 self.assertIsInstance(compuri, ResourcePath)
729 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
730 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
732 def testStorageClassOverrideGet(self) -> None:
733 """Test storage class conversion on get with override."""
734 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
735 datasetTypeName = "anything"
736 run = self.default_run
738 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
740 # Create and store a dataset.
741 metric = makeExampleMetrics()
742 dataId = {"instrument": "DummyCamComp", "visit": 423}
744 ref = butler.put(metric, datasetType, dataId)
746 # Return native type.
747 retrieved = butler.get(ref)
748 self.assertEqual(retrieved, metric)
750 # Specify an override.
751 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
752 model = butler.get(ref, storageClass=new_sc)
753 self.assertNotEqual(type(model), type(retrieved))
754 self.assertIs(type(model), new_sc.pytype)
755 self.assertEqual(retrieved, model)
757 # Defer but override later.
758 deferred = butler.getDeferred(ref)
759 model = deferred.get(storageClass=new_sc)
760 self.assertIs(type(model), new_sc.pytype)
761 self.assertEqual(retrieved, model)
763 # Defer but override up front.
764 deferred = butler.getDeferred(ref, storageClass=new_sc)
765 model = deferred.get()
766 self.assertIs(type(model), new_sc.pytype)
767 self.assertEqual(retrieved, model)
769 # Retrieve a component. Should be a tuple.
770 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
771 self.assertIs(type(data), tuple)
772 self.assertEqual(data, tuple(retrieved.data))
774 # Parameter on the write storage class should work regardless
775 # of read storage class.
776 data = butler.get(
777 "anything.data",
778 dataId,
779 storageClass="StructuredDataDataTestTuple",
780 parameters={"slice": slice(2, 4)},
781 )
782 self.assertEqual(len(data), 2)
784 # Try a parameter that is known to the read storage class but not
785 # the write storage class.
786 with self.assertRaises(KeyError):
787 butler.get(
788 "anything.data",
789 dataId,
790 storageClass="StructuredDataDataTestTuple",
791 parameters={"xslice": slice(2, 4)},
792 )
794 def testPytypePutCoercion(self) -> None:
795 """Test python type coercion on Butler.get and put."""
796 # Store some data with the normal example storage class.
797 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
798 datasetTypeName = "test_metric"
799 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
801 dataId = {"instrument": "DummyCamComp", "visit": 423}
803 # Put a dict and this should coerce to a MetricsExample
804 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
805 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
806 test_metric = butler.get(metric_ref)
807 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
808 self.assertEqual(test_metric.summary, test_dict["summary"])
809 self.assertEqual(test_metric.output, test_dict["output"])
811 # Check that the put still works if a DatasetType is given with
812 # a definition matching this python type.
813 registry_type = butler.registry.getDatasetType(datasetTypeName)
814 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
815 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
816 self.assertEqual(metric2_ref.datasetType, registry_type)
818 # The get will return the type expected by registry.
819 test_metric2 = butler.get(metric2_ref)
820 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
822 # Make a new DatasetRef with the compatible but different DatasetType.
823 # This should now return a dict.
824 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
825 test_dict2 = butler.get(new_ref)
826 self.assertEqual(get_full_type_name(test_dict2), "dict")
828 # Get it again with the wrong dataset type definition using get()
829 # rather than get(). This should be consistent with get()
830 # behavior and return the type of the DatasetType.
831 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
832 self.assertEqual(get_full_type_name(test_dict3), "dict")
834 def testIngest(self) -> None:
835 butler = Butler(self.tmpConfigFile, run=self.default_run)
837 # Create and register a DatasetType
838 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"])
840 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
841 datasetTypeName = "metric"
843 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
845 # Add needed Dimensions
846 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
847 butler.registry.insertDimensionData(
848 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
849 )
850 for detector in (1, 2):
851 butler.registry.insertDimensionData(
852 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
853 )
855 butler.registry.insertDimensionData(
856 "visit",
857 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
858 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
859 )
861 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
862 dataRoot = os.path.join(TESTDIR, "data", "basic")
863 datasets = []
864 for detector in (1, 2):
865 detector_name = f"detector_{detector}"
866 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
867 dataId = butler.registry.expandDataId(
868 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
869 )
870 # Create a DatasetRef for ingest
871 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
873 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
875 butler.ingest(*datasets, transfer="copy")
877 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
878 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
880 metrics1 = butler.get(datasetTypeName, dataId1)
881 metrics2 = butler.get(datasetTypeName, dataId2)
882 self.assertNotEqual(metrics1, metrics2)
884 # Compare URIs
885 uri1 = butler.getURI(datasetTypeName, dataId1)
886 uri2 = butler.getURI(datasetTypeName, dataId2)
887 self.assertNotEqual(uri1, uri2)
889 # Now do a multi-dataset but single file ingest
890 metricFile = os.path.join(dataRoot, "detectors.yaml")
891 refs = []
892 for detector in (1, 2):
893 detector_name = f"detector_{detector}"
894 dataId = butler.registry.expandDataId(
895 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
896 )
897 # Create a DatasetRef for ingest
898 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
900 # Test "move" transfer to ensure that the files themselves
901 # have disappeared following ingest.
902 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
903 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
905 datasets = []
906 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
908 # For first ingest use copy.
909 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
911 # Now try to ingest again in "execution butler" mode where
912 # the registry entries exist but the datastore does not have
913 # the files. We also need to strip the dimension records to ensure
914 # that they will be re-added by the ingest.
915 ref = datasets[0].refs[0]
916 datasets[0].refs = [
917 cast(
918 DatasetRef,
919 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
920 )
921 for ref in datasets[0].refs
922 ]
923 all_refs = []
924 for dataset in datasets:
925 refs = []
926 for ref in dataset.refs:
927 # Create a dict from the dataId to drop the records.
928 new_data_id = {str(k): v for k, v in ref.dataId.items()}
929 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run)
930 assert new_ref is not None
931 self.assertFalse(new_ref.dataId.hasRecords())
932 refs.append(new_ref)
933 dataset.refs = refs
934 all_refs.extend(dataset.refs)
935 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
937 # Use move mode to test that the file is deleted. Also
938 # disable recording of file size.
939 butler.ingest(*datasets, transfer="move", record_validation_info=False)
941 # Check that every ref now has records.
942 for dataset in datasets:
943 for ref in dataset.refs:
944 self.assertTrue(ref.dataId.hasRecords())
946 # Ensure that the file has disappeared.
947 self.assertFalse(tempFile.exists())
949 # Check that the datastore recorded no file size.
950 # Not all datastores can support this.
951 try:
952 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
953 self.assertEqual(infos[0].file_size, -1)
954 except AttributeError:
955 pass
957 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
958 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
960 multi1 = butler.get(datasetTypeName, dataId1)
961 multi2 = butler.get(datasetTypeName, dataId2)
963 self.assertEqual(multi1, metrics1)
964 self.assertEqual(multi2, metrics2)
966 # Compare URIs
967 uri1 = butler.getURI(datasetTypeName, dataId1)
968 uri2 = butler.getURI(datasetTypeName, dataId2)
969 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
971 # Test that removing one does not break the second
972 # This line will issue a warning log message for a ChainedDatastore
973 # that uses an InMemoryDatastore since in-memory can not ingest
974 # files.
975 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
976 self.assertFalse(butler.exists(datasetTypeName, dataId1))
977 self.assertTrue(butler.exists(datasetTypeName, dataId2))
978 multi2b = butler.get(datasetTypeName, dataId2)
979 self.assertEqual(multi2, multi2b)
981 # Ensure we can ingest 0 datasets
982 datasets = []
983 butler.ingest(*datasets)
985 def testPickle(self) -> None:
986 """Test pickle support."""
987 butler = Butler(self.tmpConfigFile, run=self.default_run)
988 butlerOut = pickle.loads(pickle.dumps(butler))
989 self.assertIsInstance(butlerOut, Butler)
990 self.assertEqual(butlerOut._config, butler._config)
991 self.assertEqual(butlerOut.collections, butler.collections)
992 self.assertEqual(butlerOut.run, butler.run)
994 def testGetDatasetTypes(self) -> None:
995 butler = Butler(self.tmpConfigFile, run=self.default_run)
996 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"])
997 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
998 (
999 "instrument",
1000 [
1001 {"instrument": "DummyCam"},
1002 {"instrument": "DummyHSC"},
1003 {"instrument": "DummyCamComp"},
1004 ],
1005 ),
1006 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1007 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1008 ]
1009 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1010 # Add needed Dimensions
1011 for element, data in dimensionEntries:
1012 butler.registry.insertDimensionData(element, *data)
1014 # When a DatasetType is added to the registry entries are not created
1015 # for components but querying them can return the components.
1016 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1017 components = set()
1018 for datasetTypeName in datasetTypeNames:
1019 # Create and register a DatasetType
1020 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1022 for componentName in storageClass.components:
1023 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1025 fromRegistry: set[DatasetType] = set()
1026 for parent_dataset_type in butler.registry.queryDatasetTypes():
1027 fromRegistry.add(parent_dataset_type)
1028 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1029 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1031 # Now that we have some dataset types registered, validate them
1032 butler.validateConfiguration(
1033 ignore=[
1034 "test_metric_comp",
1035 "metric3",
1036 "metric5",
1037 "calexp",
1038 "DummySC",
1039 "datasetType.component",
1040 "random_data",
1041 "random_data_2",
1042 ]
1043 )
1045 # Add a new datasetType that will fail template validation
1046 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1047 if self.validationCanFail:
1048 with self.assertRaises(ValidationError):
1049 butler.validateConfiguration()
1051 # Rerun validation but with a subset of dataset type names
1052 butler.validateConfiguration(datasetTypeNames=["metric4"])
1054 # Rerun validation but ignore the bad datasetType
1055 butler.validateConfiguration(
1056 ignore=[
1057 "test_metric_comp",
1058 "metric3",
1059 "metric5",
1060 "calexp",
1061 "DummySC",
1062 "datasetType.component",
1063 "random_data",
1064 "random_data_2",
1065 ]
1066 )
1068 def testTransaction(self) -> None:
1069 butler = Butler(self.tmpConfigFile, run=self.default_run)
1070 datasetTypeName = "test_metric"
1071 dimensions = butler.dimensions.extract(["instrument", "visit"])
1072 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1073 ("instrument", {"instrument": "DummyCam"}),
1074 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1075 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1076 )
1077 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1078 metric = makeExampleMetrics()
1079 dataId = {"instrument": "DummyCam", "visit": 42}
1080 # Create and register a DatasetType
1081 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1082 with self.assertRaises(TransactionTestError):
1083 with butler.transaction():
1084 # Add needed Dimensions
1085 for args in dimensionEntries:
1086 butler.registry.insertDimensionData(*args)
1087 # Store a dataset
1088 ref = butler.put(metric, datasetTypeName, dataId)
1089 self.assertIsInstance(ref, DatasetRef)
1090 # Test getDirect
1091 metricOut = butler.get(ref)
1092 self.assertEqual(metric, metricOut)
1093 # Test get
1094 metricOut = butler.get(datasetTypeName, dataId)
1095 self.assertEqual(metric, metricOut)
1096 # Check we can get components
1097 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1098 raise TransactionTestError("This should roll back the entire transaction")
1099 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1100 butler.registry.expandDataId(dataId)
1101 # Should raise LookupError for missing data ID value
1102 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1103 butler.get(datasetTypeName, dataId)
1104 # Also check explicitly if Dataset entry is missing
1105 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1106 # Direct retrieval should not find the file in the Datastore
1107 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1108 butler.get(ref)
1110 def testMakeRepo(self) -> None:
1111 """Test that we can write butler configuration to a new repository via
1112 the Butler.makeRepo interface and then instantiate a butler from the
1113 repo root.
1114 """
1115 # Do not run the test if we know this datastore configuration does
1116 # not support a file system root
1117 if self.fullConfigKey is None:
1118 return
1120 # create two separate directories
1121 root1 = tempfile.mkdtemp(dir=self.root)
1122 root2 = tempfile.mkdtemp(dir=self.root)
1124 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1125 limited = Config(self.configFile)
1126 butler1 = Butler(butlerConfig)
1127 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1128 full = Config(self.tmpConfigFile)
1129 butler2 = Butler(butlerConfig)
1130 # Butlers should have the same configuration regardless of whether
1131 # defaults were expanded.
1132 self.assertEqual(butler1._config, butler2._config)
1133 # Config files loaded directly should not be the same.
1134 self.assertNotEqual(limited, full)
1135 # Make sure "limited" doesn't have a few keys we know it should be
1136 # inheriting from defaults.
1137 self.assertIn(self.fullConfigKey, full)
1138 self.assertNotIn(self.fullConfigKey, limited)
1140 # Collections don't appear until something is put in them
1141 collections1 = set(butler1.registry.queryCollections())
1142 self.assertEqual(collections1, set())
1143 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1145 # Check that a config with no associated file name will not
1146 # work properly with relocatable Butler repo
1147 butlerConfig.configFile = None
1148 with self.assertRaises(ValueError):
1149 Butler(butlerConfig)
1151 with self.assertRaises(FileExistsError):
1152 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1154 def testStringification(self) -> None:
1155 butler = Butler(self.tmpConfigFile, run=self.default_run)
1156 butlerStr = str(butler)
1158 if self.datastoreStr is not None:
1159 for testStr in self.datastoreStr:
1160 self.assertIn(testStr, butlerStr)
1161 if self.registryStr is not None:
1162 self.assertIn(self.registryStr, butlerStr)
1164 datastoreName = butler._datastore.name
1165 if self.datastoreName is not None:
1166 for testStr in self.datastoreName:
1167 self.assertIn(testStr, datastoreName)
1169 def testButlerRewriteDataId(self) -> None:
1170 """Test that dataIds can be rewritten based on dimension records."""
1171 butler = Butler(self.tmpConfigFile, run=self.default_run)
1173 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1174 datasetTypeName = "random_data"
1176 # Create dimension records.
1177 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1178 butler.registry.insertDimensionData(
1179 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1180 )
1181 butler.registry.insertDimensionData(
1182 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1183 )
1185 dimensions = butler.dimensions.extract(["instrument", "exposure"])
1186 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1187 butler.registry.registerDatasetType(datasetType)
1189 n_exposures = 5
1190 dayobs = 20210530
1192 for i in range(n_exposures):
1193 butler.registry.insertDimensionData(
1194 "exposure",
1195 {
1196 "instrument": "DummyCamComp",
1197 "id": i,
1198 "obs_id": f"exp{i}",
1199 "seq_num": i,
1200 "day_obs": dayobs,
1201 "physical_filter": "d-r",
1202 },
1203 )
1205 # Write some data.
1206 for i in range(n_exposures):
1207 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1209 # Use the seq_num for the put to test rewriting.
1210 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1211 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1213 # Check that the exposure is correct in the dataId
1214 self.assertEqual(ref.dataId["exposure"], i)
1216 # and check that we can get the dataset back with the same dataId
1217 new_metric = butler.get(datasetTypeName, dataId=dataId)
1218 self.assertEqual(new_metric, metric)
1221class FileDatastoreButlerTests(ButlerTests):
1222 """Common tests and specialization of ButlerTests for butlers backed
1223 by datastores that inherit from FileDatastore.
1224 """
1226 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1227 """Check if file exists at a given path (relative to root).
1229 Test testPutTemplates verifies actual physical existance of the files
1230 in the requested location.
1231 """
1232 uri = ResourcePath(root, forceDirectory=True)
1233 return uri.join(relpath).exists()
1235 def testPutTemplates(self) -> None:
1236 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1237 butler = Butler(self.tmpConfigFile, run=self.default_run)
1239 # Add needed Dimensions
1240 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1241 butler.registry.insertDimensionData(
1242 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1243 )
1244 butler.registry.insertDimensionData(
1245 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1246 )
1247 butler.registry.insertDimensionData(
1248 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1249 )
1251 # Create and store a dataset
1252 metric = makeExampleMetrics()
1254 # Create two almost-identical DatasetTypes (both will use default
1255 # template)
1256 dimensions = butler.dimensions.extract(["instrument", "visit"])
1257 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1258 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1259 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1261 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1262 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1264 # Put with exactly the data ID keys needed
1265 ref = butler.put(metric, "metric1", dataId1)
1266 uri = butler.getURI(ref)
1267 self.assertTrue(uri.exists())
1268 self.assertTrue(
1269 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1270 )
1272 # Check the template based on dimensions
1273 if hasattr(butler._datastore, "templates"):
1274 butler._datastore.templates.validateTemplates([ref])
1276 # Put with extra data ID keys (physical_filter is an optional
1277 # dependency); should not change template (at least the way we're
1278 # defining them to behave now; the important thing is that they
1279 # must be consistent).
1280 ref = butler.put(metric, "metric2", dataId2)
1281 uri = butler.getURI(ref)
1282 self.assertTrue(uri.exists())
1283 self.assertTrue(
1284 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1285 )
1287 # Check the template based on dimensions
1288 if hasattr(butler._datastore, "templates"):
1289 butler._datastore.templates.validateTemplates([ref])
1291 # Use a template that has a typo in dimension record metadata.
1292 # Easier to test with a butler that has a ref with records attached.
1293 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1294 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1295 path = template.format(ref)
1296 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1298 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1299 with self.assertRaises(KeyError):
1300 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1301 template.format(ref)
1303 # Now use a file template that will not result in unique filenames
1304 with self.assertRaises(FileTemplateValidationError):
1305 butler.put(metric, "metric3", dataId1)
1307 def testImportExport(self) -> None:
1308 # Run put/get tests just to create and populate a repo.
1309 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1310 self.runImportExportTest(storageClass)
1312 @unittest.expectedFailure
1313 def testImportExportVirtualComposite(self) -> None:
1314 # Run put/get tests just to create and populate a repo.
1315 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1316 self.runImportExportTest(storageClass)
1318 def runImportExportTest(self, storageClass: StorageClass) -> None:
1319 """Test exporting and importing.
1321 This test does an export to a temp directory and an import back
1322 into a new temp directory repo. It does not assume a posix datastore.
1323 """
1324 exportButler = self.runPutGetTest(storageClass, "test_metric")
1326 # Test that we must have a file extension.
1327 with self.assertRaises(ValueError):
1328 with exportButler.export(filename="dump", directory=".") as export:
1329 pass
1331 # Test that unknown format is not allowed.
1332 with self.assertRaises(ValueError):
1333 with exportButler.export(filename="dump.fits", directory=".") as export:
1334 pass
1336 # Test that the repo actually has at least one dataset.
1337 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1338 self.assertGreater(len(datasets), 0)
1339 # Add a DimensionRecord that's unused by those datasets.
1340 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1341 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1342 # Export and then import datasets.
1343 with safeTestTempDir(TESTDIR) as exportDir:
1344 exportFile = os.path.join(exportDir, "exports.yaml")
1345 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1346 export.saveDatasets(datasets)
1347 # Export the same datasets again. This should quietly do
1348 # nothing because of internal deduplication, and it shouldn't
1349 # complain about being asked to export the "htm7" elements even
1350 # though there aren't any in these datasets or in the database.
1351 export.saveDatasets(datasets, elements=["htm7"])
1352 # Save one of the data IDs again; this should be harmless
1353 # because of internal deduplication.
1354 export.saveDataIds([datasets[0].dataId])
1355 # Save some dimension records directly.
1356 export.saveDimensionData("skymap", [skymapRecord])
1357 self.assertTrue(os.path.exists(exportFile))
1358 with safeTestTempDir(TESTDIR) as importDir:
1359 # We always want this to be a local posix butler
1360 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1361 # Calling script.butlerImport tests the implementation of the
1362 # butler command line interface "import" subcommand. Functions
1363 # in the script folder are generally considered protected and
1364 # should not be used as public api.
1365 with open(exportFile) as f:
1366 script.butlerImport(
1367 importDir,
1368 export_file=f,
1369 directory=exportDir,
1370 transfer="auto",
1371 skip_dimensions=None,
1372 )
1373 importButler = Butler(importDir, run=self.default_run)
1374 for ref in datasets:
1375 with self.subTest(ref=ref):
1376 # Test for existence by passing in the DatasetType and
1377 # data ID separately, to avoid lookup by dataset_id.
1378 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1379 self.assertEqual(
1380 list(importButler.registry.queryDimensionRecords("skymap")),
1381 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1382 )
1384 def testRemoveRuns(self) -> None:
1385 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1386 butler = Butler(self.tmpConfigFile, writeable=True)
1387 # Load registry data with dimensions to hang datasets off of.
1388 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1389 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1390 # Add some RUN-type collection.
1391 run1 = "run1"
1392 butler.registry.registerRun(run1)
1393 run2 = "run2"
1394 butler.registry.registerRun(run2)
1395 # put a dataset in each
1396 metric = makeExampleMetrics()
1397 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1398 datasetType = self.addDatasetType(
1399 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1400 )
1401 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1402 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1403 uri1 = butler.getURI(ref1)
1404 uri2 = butler.getURI(ref2)
1406 with self.assertRaises(OrphanedRecordError):
1407 butler.registry.removeDatasetType(datasetType.name)
1409 # Remove from both runs with different values for unstore.
1410 butler.removeRuns([run1], unstore=True)
1411 butler.removeRuns([run2], unstore=False)
1412 # Should be nothing in registry for either one, and datastore should
1413 # not think either exists.
1414 with self.assertRaises(MissingCollectionError):
1415 butler.registry.getCollectionType(run1)
1416 with self.assertRaises(MissingCollectionError):
1417 butler.registry.getCollectionType(run2)
1418 self.assertFalse(butler.stored(ref1))
1419 self.assertFalse(butler.stored(ref2))
1420 # The ref we unstored should be gone according to the URI, but the
1421 # one we forgot should still be around.
1422 self.assertFalse(uri1.exists())
1423 self.assertTrue(uri2.exists())
1425 # Now that the collections have been pruned we can remove the
1426 # dataset type
1427 butler.registry.removeDatasetType(datasetType.name)
1429 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm:
1430 butler.registry.removeDatasetType(tuple(["test*", "test*"]))
1431 self.assertIn("not defined", "\n".join(cm.output))
1434class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1435 """PosixDatastore specialization of a butler"""
1437 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1438 fullConfigKey: str | None = ".datastore.formatters"
1439 validationCanFail = True
1440 datastoreStr = ["/tmp"]
1441 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1442 registryStr = "/gen3.sqlite3"
1444 def testPathConstructor(self) -> None:
1445 """Independent test of constructor using PathLike."""
1446 butler = Butler(self.tmpConfigFile, run=self.default_run)
1447 self.assertIsInstance(butler, Butler)
1449 # And again with a Path object with the butler yaml
1450 path = pathlib.Path(self.tmpConfigFile)
1451 butler = Butler(path, writeable=False)
1452 self.assertIsInstance(butler, Butler)
1454 # And again with a Path object without the butler yaml
1455 # (making sure we skip it if the tmp config doesn't end
1456 # in butler.yaml -- which is the case for a subclass)
1457 if self.tmpConfigFile.endswith("butler.yaml"):
1458 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1459 butler = Butler(path, writeable=False)
1460 self.assertIsInstance(butler, Butler)
1462 def testExportTransferCopy(self) -> None:
1463 """Test local export using all transfer modes"""
1464 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1465 exportButler = self.runPutGetTest(storageClass, "test_metric")
1466 # Test that the repo actually has at least one dataset.
1467 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1468 self.assertGreater(len(datasets), 0)
1469 uris = [exportButler.getURI(d) for d in datasets]
1470 assert isinstance(exportButler._datastore, FileDatastore)
1471 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1473 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1475 for path in pathsInStore:
1476 # Assume local file system
1477 assert path is not None
1478 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1480 for transfer in ("copy", "link", "symlink", "relsymlink"):
1481 with safeTestTempDir(TESTDIR) as exportDir:
1482 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1483 export.saveDatasets(datasets)
1484 for path in pathsInStore:
1485 assert path is not None
1486 self.assertTrue(
1487 self.checkFileExists(exportDir, path),
1488 f"Check that mode {transfer} exported files",
1489 )
1491 def testPruneDatasets(self) -> None:
1492 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1493 butler = Butler(self.tmpConfigFile, writeable=True)
1494 assert isinstance(butler._datastore, FileDatastore)
1495 # Load registry data with dimensions to hang datasets off of.
1496 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1497 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1498 # Add some RUN-type collections.
1499 run1 = "run1"
1500 butler.registry.registerRun(run1)
1501 run2 = "run2"
1502 butler.registry.registerRun(run2)
1503 # put some datasets. ref1 and ref2 have the same data ID, and are in
1504 # different runs. ref3 has a different data ID.
1505 metric = makeExampleMetrics()
1506 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1507 datasetType = self.addDatasetType(
1508 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1509 )
1510 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1511 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1512 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1514 many_stored = butler.stored_many([ref1, ref2, ref3])
1515 for ref, stored in many_stored.items():
1516 self.assertTrue(stored, f"Ref {ref} should be stored")
1518 many_exists = butler._exists_many([ref1, ref2, ref3])
1519 for ref, exists in many_exists.items():
1520 self.assertTrue(exists, f"Checking ref {ref} exists.")
1521 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1523 # Simple prune.
1524 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1525 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1527 many_stored = butler.stored_many([ref1, ref2, ref3])
1528 for ref, stored in many_stored.items():
1529 self.assertFalse(stored, f"Ref {ref} should not be stored")
1531 many_exists = butler._exists_many([ref1, ref2, ref3])
1532 for ref, exists in many_exists.items():
1533 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1535 # Put data back.
1536 ref1_new = butler.put(metric, ref1)
1537 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1538 ref2 = butler.put(metric, ref2)
1540 many_stored = butler.stored_many([ref1, ref2, ref3])
1541 self.assertTrue(many_stored[ref1])
1542 self.assertTrue(many_stored[ref2])
1543 self.assertFalse(many_stored[ref3])
1545 ref3 = butler.put(metric, ref3)
1547 many_exists = butler._exists_many([ref1, ref2, ref3])
1548 for ref, exists in many_exists.items():
1549 self.assertTrue(exists, f"Ref {ref} should not be stored")
1551 # Clear out the datasets from registry and start again.
1552 refs = [ref1, ref2, ref3]
1553 butler.pruneDatasets(refs, purge=True, unstore=True)
1554 for ref in refs:
1555 butler.put(metric, ref)
1557 # Test different forms of file availability.
1558 # Need to be in a state where:
1559 # - one ref just has registry record.
1560 # - one ref has a missing file but a datastore record.
1561 # - one ref has a missing datastore record but file is there.
1562 # - one ref does not exist anywhere.
1563 # Do not need to test a ref that has everything since that is tested
1564 # above.
1565 ref0 = DatasetRef(
1566 datasetType,
1567 DataCoordinate.standardize(
1568 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1569 ),
1570 run=run1,
1571 )
1573 # Delete from datastore and retain in Registry.
1574 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1576 # File has been removed.
1577 uri2 = butler.getURI(ref2)
1578 uri2.remove()
1580 # Datastore has lost track.
1581 butler._datastore.forget([ref3])
1583 # First test with a standard butler.
1584 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1585 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1586 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1587 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1588 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1590 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1591 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1592 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1593 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1594 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1595 self.assertTrue(exists_many[ref2])
1597 # Check that per-ref query gives the same answer as many query.
1598 for ref, exists in exists_many.items():
1599 self.assertEqual(butler.exists(ref, full_check=False), exists)
1601 # Test again with a trusting butler.
1602 butler._datastore.trustGetRequest = True
1603 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1604 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1605 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1606 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1607 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1609 # Check that per-ref query gives the same answer as many query.
1610 for ref, exists in exists_many.items():
1611 self.assertEqual(butler.exists(ref, full_check=True), exists)
1613 # Create a ref that surprisingly has the UUID of an existing ref
1614 # but is not the same.
1615 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1616 with self.assertRaises(ValueError):
1617 butler.exists(ref_bad)
1619 # Create a ref that has a compatible storage class.
1620 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1621 exists = butler.exists(ref_compat)
1622 self.assertEqual(exists, exists_many[ref2])
1624 # Remove everything and start from scratch.
1625 butler._datastore.trustGetRequest = False
1626 butler.pruneDatasets(refs, purge=True, unstore=True)
1627 for ref in refs:
1628 butler.put(metric, ref)
1630 # These tests mess directly with the trash table and can leave the
1631 # datastore in an odd state. Do them at the end.
1632 # Check that in normal mode, deleting the record will lead to
1633 # trash not touching the file.
1634 uri1 = butler.getURI(ref1)
1635 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1636 butler._datastore.forget([ref1])
1637 butler._datastore.trash(ref1)
1638 butler._datastore.emptyTrash()
1639 self.assertTrue(uri1.exists())
1640 uri1.remove() # Clean it up.
1642 # Simulate execution butler setup by deleting the datastore
1643 # record but keeping the file around and trusting.
1644 butler._datastore.trustGetRequest = True
1645 uris = butler.get_many_uris([ref2, ref3])
1646 uri2 = uris[ref2].primaryURI
1647 uri3 = uris[ref3].primaryURI
1648 self.assertTrue(uri2.exists())
1649 self.assertTrue(uri3.exists())
1651 # Remove the datastore record.
1652 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1653 butler._datastore.forget([ref2])
1654 self.assertTrue(uri2.exists())
1655 butler._datastore.trash([ref2, ref3])
1656 # Immediate removal for ref2 file
1657 self.assertFalse(uri2.exists())
1658 # But ref3 has to wait for the empty.
1659 self.assertTrue(uri3.exists())
1660 butler._datastore.emptyTrash()
1661 self.assertFalse(uri3.exists())
1663 # Clear out the datasets from registry.
1664 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1666 def testPytypeCoercion(self) -> None:
1667 """Test python type coercion on Butler.get and put."""
1668 # Store some data with the normal example storage class.
1669 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1670 datasetTypeName = "test_metric"
1671 butler = self.runPutGetTest(storageClass, datasetTypeName)
1673 dataId = {"instrument": "DummyCamComp", "visit": 423}
1674 metric = butler.get(datasetTypeName, dataId=dataId)
1675 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1677 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1678 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1680 # Now need to hack the registry dataset type definition.
1681 # There is no API for this.
1682 assert isinstance(butler._registry, SqlRegistry)
1683 manager = butler._registry._managers.datasets
1684 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1685 manager._db.update(
1686 manager._static.dataset_type,
1687 {"name": datasetTypeName},
1688 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1689 )
1691 # Force reset of dataset type cache
1692 butler.registry.refresh()
1694 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1695 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1696 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1698 metric_model = butler.get(datasetTypeName, dataId=dataId)
1699 self.assertNotEqual(type(metric_model), type(metric))
1700 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1702 # Put the model and read it back to show that everything now
1703 # works as normal.
1704 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1705 metric_model_new = butler.get(metric_ref)
1706 self.assertEqual(metric_model_new, metric_model)
1708 # Hack the storage class again to something that will fail on the
1709 # get with no conversion class.
1710 manager._db.update(
1711 manager._static.dataset_type,
1712 {"name": datasetTypeName},
1713 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1714 )
1715 butler.registry.refresh()
1717 with self.assertRaises(ValueError):
1718 butler.get(datasetTypeName, dataId=dataId)
1721@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1722class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1723 """PosixDatastore specialization of a butler using Postgres"""
1725 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1726 fullConfigKey = ".datastore.formatters"
1727 validationCanFail = True
1728 datastoreStr = ["/tmp"]
1729 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1730 registryStr = "PostgreSQL@test"
1731 postgresql: Any
1733 @staticmethod
1734 def _handler(postgresql: Any) -> None:
1735 engine = sqlalchemy.engine.create_engine(postgresql.url())
1736 with engine.begin() as connection:
1737 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1739 @classmethod
1740 def setUpClass(cls) -> None:
1741 # Create the postgres test server.
1742 cls.postgresql = testing.postgresql.PostgresqlFactory(
1743 cache_initialized_db=True, on_initialized=cls._handler
1744 )
1745 super().setUpClass()
1747 @classmethod
1748 def tearDownClass(cls) -> None:
1749 # Clean up any lingering SQLAlchemy engines/connections
1750 # so they're closed before we shut down the server.
1751 gc.collect()
1752 cls.postgresql.clear_cache()
1753 super().tearDownClass()
1755 def setUp(self) -> None:
1756 self.server = self.postgresql()
1758 # Need to add a registry section to the config.
1759 self._temp_config = False
1760 config = Config(self.configFile)
1761 config["registry", "db"] = self.server.url()
1762 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1763 config.dump(fh)
1764 self.configFile = fh.name
1765 self._temp_config = True
1766 super().setUp()
1768 def tearDown(self) -> None:
1769 self.server.stop()
1770 if self._temp_config and os.path.exists(self.configFile):
1771 os.remove(self.configFile)
1772 super().tearDown()
1774 def testMakeRepo(self) -> None:
1775 # The base class test assumes that it's using sqlite and assumes
1776 # the config file is acceptable to sqlite.
1777 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1780class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1781 """InMemoryDatastore specialization of a butler"""
1783 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1784 fullConfigKey = None
1785 useTempRoot = False
1786 validationCanFail = False
1787 datastoreStr = ["datastore='InMemory"]
1788 datastoreName = ["InMemoryDatastore@"]
1789 registryStr = "/gen3.sqlite3"
1791 def testIngest(self) -> None:
1792 pass
1795class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1796 """PosixDatastore specialization"""
1798 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1799 fullConfigKey = ".datastore.datastores.1.formatters"
1800 validationCanFail = True
1801 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1802 datastoreName = [
1803 "InMemoryDatastore@",
1804 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1805 "SecondDatastore",
1806 ]
1807 registryStr = "/gen3.sqlite3"
1810class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1811 """Test that a yaml file in one location can refer to a root in another."""
1813 datastoreStr = ["dir1"]
1814 # Disable the makeRepo test since we are deliberately not using
1815 # butler.yaml as the config name.
1816 fullConfigKey = None
1818 def setUp(self) -> None:
1819 self.root = makeTestTempDir(TESTDIR)
1821 # Make a new repository in one place
1822 self.dir1 = os.path.join(self.root, "dir1")
1823 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1825 # Move the yaml file to a different place and add a "root"
1826 self.dir2 = os.path.join(self.root, "dir2")
1827 os.makedirs(self.dir2, exist_ok=True)
1828 configFile1 = os.path.join(self.dir1, "butler.yaml")
1829 config = Config(configFile1)
1830 config["root"] = self.dir1
1831 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1832 config.dumpToUri(configFile2)
1833 os.remove(configFile1)
1834 self.tmpConfigFile = configFile2
1836 def testFileLocations(self) -> None:
1837 self.assertNotEqual(self.dir1, self.dir2)
1838 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1839 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1840 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1843class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1844 """Test that a config file created by makeRepo outside of repo works."""
1846 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1848 def setUp(self) -> None:
1849 self.root = makeTestTempDir(TESTDIR)
1850 self.root2 = makeTestTempDir(TESTDIR)
1852 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1853 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1855 def tearDown(self) -> None:
1856 if os.path.exists(self.root2):
1857 shutil.rmtree(self.root2, ignore_errors=True)
1858 super().tearDown()
1860 def testConfigExistence(self) -> None:
1861 c = Config(self.tmpConfigFile)
1862 uri_config = ResourcePath(c["root"])
1863 uri_expected = ResourcePath(self.root, forceDirectory=True)
1864 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1865 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1867 def testPutGet(self) -> None:
1868 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1869 self.runPutGetTest(storageClass, "test_metric")
1872class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1873 """Test that a config file created by makeRepo outside of repo works."""
1875 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1877 def setUp(self) -> None:
1878 self.root = makeTestTempDir(TESTDIR)
1879 self.root2 = makeTestTempDir(TESTDIR)
1881 self.tmpConfigFile = self.root2
1882 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1884 def testConfigExistence(self) -> None:
1885 # Append the yaml file else Config constructor does not know the file
1886 # type.
1887 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1888 super().testConfigExistence()
1891class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1892 """Test that a config file created by makeRepo outside of repo works."""
1894 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1896 def setUp(self) -> None:
1897 self.root = makeTestTempDir(TESTDIR)
1898 self.root2 = makeTestTempDir(TESTDIR)
1900 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1901 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1904@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1905class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1906 """S3Datastore specialization of a butler; an S3 storage Datastore +
1907 a local in-memory SqlRegistry.
1908 """
1910 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1911 fullConfigKey = None
1912 validationCanFail = True
1914 bucketName = "anybucketname"
1915 """Name of the Bucket that will be used in the tests. The name is read from
1916 the config file used with the tests during set-up.
1917 """
1919 root = "butlerRoot/"
1920 """Root repository directory expected to be used in case useTempRoot=False.
1921 Otherwise the root is set to a 20 characters long randomly generated string
1922 during set-up.
1923 """
1925 datastoreStr = [f"datastore={root}"]
1926 """Contains all expected root locations in a format expected to be
1927 returned by Butler stringification.
1928 """
1930 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1931 """The expected format of the S3 Datastore string."""
1933 registryStr = "/gen3.sqlite3"
1934 """Expected format of the Registry string."""
1936 mock_s3 = mock_s3()
1937 """The mocked s3 interface from moto."""
1939 def genRoot(self) -> str:
1940 """Return a random string of len 20 to serve as a root
1941 name for the temporary bucket repo.
1943 This is equivalent to tempfile.mkdtemp as this is what self.root
1944 becomes when useTempRoot is True.
1945 """
1946 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1947 return rndstr + "/"
1949 def setUp(self) -> None:
1950 config = Config(self.configFile)
1951 uri = ResourcePath(config[".datastore.datastore.root"])
1952 self.bucketName = uri.netloc
1954 # Enable S3 mocking of tests.
1955 self.mock_s3.start()
1957 # set up some fake credentials if they do not exist
1958 self.usingDummyCredentials = setAwsEnvCredentials()
1960 if self.useTempRoot:
1961 self.root = self.genRoot()
1962 rooturi = f"s3://{self.bucketName}/{self.root}"
1963 config.update({"datastore": {"datastore": {"root": rooturi}}})
1965 # need local folder to store registry database
1966 self.reg_dir = makeTestTempDir(TESTDIR)
1967 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1969 # MOTO needs to know that we expect Bucket bucketname to exist
1970 # (this used to be the class attribute bucketName)
1971 s3 = boto3.resource("s3")
1972 s3.create_bucket(Bucket=self.bucketName)
1974 self.datastoreStr = [f"datastore='{rooturi}'"]
1975 self.datastoreName = [f"FileDatastore@{rooturi}"]
1976 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1977 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1979 def tearDown(self) -> None:
1980 s3 = boto3.resource("s3")
1981 bucket = s3.Bucket(self.bucketName)
1982 try:
1983 bucket.objects.all().delete()
1984 except botocore.exceptions.ClientError as e:
1985 if e.response["Error"]["Code"] == "404":
1986 # the key was not reachable - pass
1987 pass
1988 else:
1989 raise
1991 bucket = s3.Bucket(self.bucketName)
1992 bucket.delete()
1994 # Stop the S3 mock.
1995 self.mock_s3.stop()
1997 # unset any potentially set dummy credentials
1998 if self.usingDummyCredentials:
1999 unsetAwsEnvCredentials()
2001 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2002 shutil.rmtree(self.reg_dir, ignore_errors=True)
2004 if self.useTempRoot and os.path.exists(self.root):
2005 shutil.rmtree(self.root, ignore_errors=True)
2007 super().tearDown()
2010class PosixDatastoreTransfers(unittest.TestCase):
2011 """Test data transfers between butlers.
2013 Test for different managers. UUID to UUID and integer to integer are
2014 tested. UUID to integer is not supported since we do not currently
2015 want to allow that. Integer to UUID is supported with the caveat
2016 that UUID4 will be generated and this will be incorrect for raw
2017 dataset types. The test ignores that.
2018 """
2020 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2021 storageClassFactory: StorageClassFactory
2023 @classmethod
2024 def setUpClass(cls) -> None:
2025 cls.storageClassFactory = StorageClassFactory()
2026 cls.storageClassFactory.addFromConfig(cls.configFile)
2028 def setUp(self) -> None:
2029 self.root = makeTestTempDir(TESTDIR)
2030 self.config = Config(self.configFile)
2032 def tearDown(self) -> None:
2033 removeTestTempDir(self.root)
2035 def create_butler(self, manager: str, label: str) -> Butler:
2036 config = Config(self.configFile)
2037 config["registry", "managers", "datasets"] = manager
2038 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2040 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2041 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2042 if manager1 is None:
2043 manager1 = default
2044 if manager2 is None:
2045 manager2 = default
2046 self.source_butler = self.create_butler(manager1, "1")
2047 self.target_butler = self.create_butler(manager2, "2")
2049 def testTransferUuidToUuid(self) -> None:
2050 self.create_butlers()
2051 self.assertButlerTransfers()
2053 def _enable_trust(self, datastore: Datastore) -> None:
2054 if hasattr(datastore, "trustGetRequest"):
2055 datastore.trustGetRequest = True
2056 elif hasattr(datastore, "datastores"):
2057 for datastore in datastore.datastores:
2058 if hasattr(datastore, "trustGetRequest"):
2059 datastore.trustGetRequest = True
2061 def testTransferMissing(self) -> None:
2062 """Test transfers where datastore records are missing.
2064 This is how execution butler works.
2065 """
2066 self.create_butlers()
2068 # Configure the source butler to allow trust.
2069 self._enable_trust(self.source_butler._datastore)
2071 self.assertButlerTransfers(purge=True)
2073 def testTransferMissingDisassembly(self) -> None:
2074 """Test transfers where datastore records are missing.
2076 This is how execution butler works.
2077 """
2078 self.create_butlers()
2080 # Configure the source butler to allow trust.
2081 self._enable_trust(self.source_butler._datastore)
2083 # Test disassembly.
2084 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2086 def testAbsoluteURITransferDirect(self) -> None:
2087 """Test transfer using an absolute URI."""
2088 self._absolute_transfer("auto")
2090 def testAbsoluteURITransferCopy(self) -> None:
2091 """Test transfer using an absolute URI."""
2092 self._absolute_transfer("copy")
2094 def _absolute_transfer(self, transfer: str) -> None:
2095 self.create_butlers()
2097 storageClassName = "StructuredData"
2098 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2099 datasetTypeName = "random_data"
2100 run = "run1"
2101 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2103 dimensions = self.source_butler.dimensions.extract(())
2104 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2105 self.source_butler.registry.registerDatasetType(datasetType)
2107 metrics = makeExampleMetrics()
2108 with ResourcePath.temporary_uri(suffix=".json") as temp:
2109 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions)
2110 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2111 temp.write(json.dumps(metrics.exportAsDict()).encode())
2112 dataset = FileDataset(path=temp, refs=source_refs)
2113 self.source_butler.ingest(dataset, transfer="direct")
2115 self.target_butler.transfer_from(
2116 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2117 )
2119 uri = self.target_butler.getURI(dataset.refs[0])
2120 if transfer == "auto":
2121 self.assertEqual(uri, temp)
2122 else:
2123 self.assertNotEqual(uri, temp)
2125 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2126 """Test that a run can be transferred to another butler."""
2127 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2128 datasetTypeName = "random_data"
2130 # Test will create 3 collections and we will want to transfer
2131 # two of those three.
2132 runs = ["run1", "run2", "other"]
2134 # Also want to use two different dataset types to ensure that
2135 # grouping works.
2136 datasetTypeNames = ["random_data", "random_data_2"]
2138 # Create the run collections in the source butler.
2139 for run in runs:
2140 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2142 # Create dimensions in source butler.
2143 n_exposures = 30
2144 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2145 self.source_butler.registry.insertDimensionData(
2146 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2147 )
2148 self.source_butler.registry.insertDimensionData(
2149 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2150 )
2152 for i in range(n_exposures):
2153 self.source_butler.registry.insertDimensionData(
2154 "exposure",
2155 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2156 )
2158 # Create dataset types in the source butler.
2159 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"])
2160 for datasetTypeName in datasetTypeNames:
2161 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2162 self.source_butler.registry.registerDatasetType(datasetType)
2164 # Write a dataset to an unrelated run -- this will ensure that
2165 # we are rewriting integer dataset ids in the target if necessary.
2166 # Will not be relevant for UUID.
2167 run = "distraction"
2168 butler = Butler(butler=self.source_butler, run=run)
2169 butler.put(
2170 makeExampleMetrics(),
2171 datasetTypeName,
2172 exposure=1,
2173 instrument="DummyCamComp",
2174 physical_filter="d-r",
2175 )
2177 # Write some example metrics to the source
2178 butler = Butler(butler=self.source_butler)
2180 # Set of DatasetRefs that should be in the list of refs to transfer
2181 # but which will not be transferred.
2182 deleted: set[DatasetRef] = set()
2184 n_expected = 20 # Number of datasets expected to be transferred
2185 source_refs = []
2186 for i in range(n_exposures):
2187 # Put a third of datasets into each collection, only retain
2188 # two thirds.
2189 index = i % 3
2190 run = runs[index]
2191 datasetTypeName = datasetTypeNames[i % 2]
2193 metric = MetricsExample(
2194 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2195 )
2196 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2197 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2199 # Remove the datastore record using low-level API
2200 if purge:
2201 # Remove records for a fraction.
2202 if index == 1:
2203 # For one of these delete the file as well.
2204 # This allows the "missing" code to filter the
2205 # file out.
2206 # Access the individual datastores.
2207 datastores = []
2208 if hasattr(butler._datastore, "datastores"):
2209 datastores.extend(butler._datastore.datastores)
2210 else:
2211 datastores.append(butler._datastore)
2213 if not deleted:
2214 # For a chained datastore we need to remove
2215 # files in each chain.
2216 for datastore in datastores:
2217 # The file might not be known to the datastore
2218 # if constraints are used.
2219 try:
2220 primary, uris = datastore.getURIs(ref)
2221 except FileNotFoundError:
2222 continue
2223 if primary:
2224 if primary.scheme != "mem":
2225 primary.remove()
2226 for uri in uris.values():
2227 if uri.scheme != "mem":
2228 uri.remove()
2229 n_expected -= 1
2230 deleted.add(ref)
2232 # Remove the datastore record.
2233 for datastore in datastores:
2234 if hasattr(datastore, "removeStoredItemInfo"):
2235 datastore.removeStoredItemInfo(ref)
2237 if index < 2:
2238 source_refs.append(ref)
2239 if ref not in deleted:
2240 new_metric = butler.get(ref)
2241 self.assertEqual(new_metric, metric)
2243 # Create some bad dataset types to ensure we check for inconsistent
2244 # definitions.
2245 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2246 for datasetTypeName in datasetTypeNames:
2247 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2248 self.target_butler.registry.registerDatasetType(datasetType)
2249 with self.assertRaises(ConflictingDefinitionError) as cm:
2250 self.target_butler.transfer_from(self.source_butler, source_refs)
2251 self.assertIn("dataset type differs", str(cm.exception))
2253 # And remove the bad definitions.
2254 for datasetTypeName in datasetTypeNames:
2255 self.target_butler.registry.removeDatasetType(datasetTypeName)
2257 # Transfer without creating dataset types should fail.
2258 with self.assertRaises(KeyError):
2259 self.target_butler.transfer_from(self.source_butler, source_refs)
2261 # Transfer without creating dimensions should fail.
2262 with self.assertRaises(ConflictingDefinitionError) as cm:
2263 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2264 self.assertIn("dimension", str(cm.exception))
2266 # The failed transfer above leaves registry in an inconsistent
2267 # state because the run is created but then rolled back without
2268 # the collection cache being cleared. For now force a refresh.
2269 # Can remove with DM-35498.
2270 self.target_butler.registry.refresh()
2272 # Now transfer them to the second butler, including dimensions.
2273 with self.assertLogs(level=logging.DEBUG) as log_cm:
2274 transferred = self.target_butler.transfer_from(
2275 self.source_butler,
2276 source_refs,
2277 register_dataset_types=True,
2278 transfer_dimensions=True,
2279 )
2280 self.assertEqual(len(transferred), n_expected)
2281 log_output = ";".join(log_cm.output)
2283 # A ChainedDatastore will use the in-memory datastore for mexists
2284 # so we can not rely on the mexists log message.
2285 self.assertIn("Number of datastore records found in source", log_output)
2286 self.assertIn("Creating output run", log_output)
2288 # Do the transfer twice to ensure that it will do nothing extra.
2289 # Only do this if purge=True because it does not work for int
2290 # dataset_id.
2291 if purge:
2292 # This should not need to register dataset types.
2293 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2294 self.assertEqual(len(transferred), n_expected)
2296 # Also do an explicit low-level transfer to trigger some
2297 # edge cases.
2298 with self.assertLogs(level=logging.DEBUG) as log_cm:
2299 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2300 log_output = ";".join(log_cm.output)
2301 self.assertIn("no file artifacts exist", log_output)
2303 with self.assertRaises((TypeError, AttributeError)):
2304 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2306 with self.assertRaises(ValueError):
2307 self.target_butler._datastore.transfer_from(
2308 self.source_butler._datastore, source_refs, transfer="split"
2309 )
2311 # Now try to get the same refs from the new butler.
2312 for ref in source_refs:
2313 if ref not in deleted:
2314 new_metric = self.target_butler.get(ref)
2315 old_metric = self.source_butler.get(ref)
2316 self.assertEqual(new_metric, old_metric)
2318 # Now prune run2 collection and create instead a CHAINED collection.
2319 # This should block the transfer.
2320 self.target_butler.removeRuns(["run2"], unstore=True)
2321 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2322 with self.assertRaises(CollectionTypeError):
2323 # Re-importing the run1 datasets can be problematic if they
2324 # use integer IDs so filter those out.
2325 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2326 self.target_butler.transfer_from(self.source_butler, to_transfer)
2329class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2330 """Test transfers using a chained datastore."""
2332 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2335def setup_module(module: types.ModuleType) -> None:
2336 """Set up the module for pytest."""
2337 clean_environment()
2340if __name__ == "__main__":
2341 clean_environment()
2342 unittest.main()