Coverage for tests/test_butler.py: 13%
1314 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Tests for Butler.
29"""
30from __future__ import annotations
32import gc
33import json
34import logging
35import os
36import pathlib
37import pickle
38import posixpath
39import random
40import shutil
41import string
42import tempfile
43import unittest
44import uuid
45from collections.abc import Mapping
46from typing import TYPE_CHECKING, Any, cast
48try:
49 import boto3
50 import botocore
51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
52 from moto import mock_s3 # type: ignore[import]
53except ImportError:
54 boto3 = None
56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
57 """No-op decorator in case moto mock_s3 can not be imported."""
58 return None
61try:
62 # It's possible but silly to have testing.postgresql installed without
63 # having the postgresql server installed (because then nothing in
64 # testing.postgresql would work), so we use the presence of that module
65 # to test whether we can expect the server to be available.
66 import testing.postgresql # type: ignore[import]
67except ImportError:
68 testing = None
70import astropy.time
71import sqlalchemy
72from lsst.daf.butler import (
73 Butler,
74 ButlerConfig,
75 ButlerRepoIndex,
76 CollectionType,
77 Config,
78 DataCoordinate,
79 DatasetExistence,
80 DatasetRef,
81 DatasetType,
82 FileDataset,
83 StorageClassFactory,
84 ValidationError,
85 script,
86)
87from lsst.daf.butler.datastore import NullDatastore
88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError
89from lsst.daf.butler.datastores.fileDatastore import FileDatastore
90from lsst.daf.butler.direct_butler import DirectButler
91from lsst.daf.butler.registry import (
92 CollectionError,
93 CollectionTypeError,
94 ConflictingDefinitionError,
95 DataIdValueError,
96 MissingCollectionError,
97 OrphanedRecordError,
98)
99from lsst.daf.butler.registry.sql_registry import SqlRegistry
100from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG
101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
103from lsst.resources import ResourcePath
104from lsst.utils import doImportType
105from lsst.utils.introspection import get_full_type_name
107if TYPE_CHECKING:
108 import types
110 from lsst.daf.butler import Datastore, DimensionGroup, Registry, StorageClass
112TESTDIR = os.path.abspath(os.path.dirname(__file__))
115def clean_environment() -> None:
116 """Remove external environment variables that affect the tests."""
117 for k in (
118 "DAF_BUTLER_REPOSITORY_INDEX",
119 "S3_ENDPOINT_URL",
120 "AWS_ACCESS_KEY_ID",
121 "AWS_SECRET_ACCESS_KEY",
122 "AWS_SHARED_CREDENTIALS_FILE",
123 ):
124 os.environ.pop(k, None)
127def makeExampleMetrics() -> MetricsExample:
128 """Return example dataset suitable for tests."""
129 return MetricsExample(
130 {"AM1": 5.2, "AM2": 30.6},
131 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
132 [563, 234, 456.7, 752, 8, 9, 27],
133 )
136class TransactionTestError(Exception):
137 """Specific error for testing transactions, to prevent misdiagnosing
138 that might otherwise occur when a standard exception is used.
139 """
141 pass
144class ButlerConfigTests(unittest.TestCase):
145 """Simple tests for ButlerConfig that are not tested in any other test
146 cases.
147 """
149 def testSearchPath(self) -> None:
150 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
151 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
152 config1 = ButlerConfig(configFile)
153 self.assertNotIn("testConfigs", "\n".join(cm.output))
155 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
156 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
157 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
158 self.assertIn("testConfigs", "\n".join(cm.output))
160 key = ("datastore", "records", "table")
161 self.assertNotEqual(config1[key], config2[key])
162 self.assertEqual(config2[key], "override_record")
165class ButlerPutGetTests(TestCaseMixin):
166 """Helper method for running a suite of put/get tests from different
167 butler configurations.
168 """
170 root: str
171 default_run = "ingésτ😺"
172 storageClassFactory: StorageClassFactory
173 configFile: str
174 tmpConfigFile: str
176 @staticmethod
177 def addDatasetType(
178 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry
179 ) -> DatasetType:
180 """Create a DatasetType and register it"""
181 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
182 registry.registerDatasetType(datasetType)
183 return datasetType
185 @classmethod
186 def setUpClass(cls) -> None:
187 cls.storageClassFactory = StorageClassFactory()
188 cls.storageClassFactory.addFromConfig(cls.configFile)
190 def assertGetComponents(
191 self,
192 butler: Butler,
193 datasetRef: DatasetRef,
194 components: tuple[str, ...],
195 reference: Any,
196 collections: Any = None,
197 ) -> None:
198 datasetType = datasetRef.datasetType
199 dataId = datasetRef.dataId
200 deferred = butler.getDeferred(datasetRef)
202 for component in components:
203 compTypeName = datasetType.componentTypeName(component)
204 result = butler.get(compTypeName, dataId, collections=collections)
205 self.assertEqual(result, getattr(reference, component))
206 result_deferred = deferred.get(component=component)
207 self.assertEqual(result_deferred, result)
209 def tearDown(self) -> None:
210 removeTestTempDir(self.root)
212 def create_butler(
213 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
214 ) -> tuple[DirectButler, DatasetType]:
215 butler = Butler.from_config(self.tmpConfigFile, run=run)
216 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration"
218 collections = set(butler.registry.queryCollections())
219 self.assertEqual(collections, {run})
221 # Create and register a DatasetType
222 dimensions = butler.dimensions.conform(["instrument", "visit"])
224 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
226 # Add needed Dimensions
227 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
228 butler.registry.insertDimensionData(
229 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
230 )
231 butler.registry.insertDimensionData(
232 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
233 )
234 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
235 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
236 butler.registry.insertDimensionData(
237 "visit",
238 {
239 "instrument": "DummyCamComp",
240 "id": 423,
241 "name": "fourtwentythree",
242 "physical_filter": "d-r",
243 "datetime_begin": visit_start,
244 "datetime_end": visit_end,
245 },
246 )
248 # Add more visits for some later tests
249 for visit_id in (424, 425):
250 butler.registry.insertDimensionData(
251 "visit",
252 {
253 "instrument": "DummyCamComp",
254 "id": visit_id,
255 "name": f"fourtwentyfour_{visit_id}",
256 "physical_filter": "d-r",
257 },
258 )
259 return butler, datasetType
261 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler:
262 # New datasets will be added to run and tag, but we will only look in
263 # tag when looking up datasets.
264 run = self.default_run
265 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
266 assert butler.run is not None
268 # Create and store a dataset
269 metric = makeExampleMetrics()
270 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
272 # Put and remove the dataset once as a DatasetRef, once as a dataId,
273 # and once with a DatasetType
275 # Keep track of any collections we add and do not clean up
276 expected_collections = {run}
278 counter = 0
279 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
280 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
281 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
282 # Since we are using subTest we can get cascading failures
283 # here with the first attempt failing and the others failing
284 # immediately because the dataset already exists. Work around
285 # this by using a distinct run collection each time
286 counter += 1
287 this_run = f"put_run_{counter}"
288 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
289 expected_collections.update({this_run})
291 with self.subTest(args=args):
292 kwargs: dict[str, Any] = {}
293 if not isinstance(args[0], DatasetRef): # type: ignore
294 kwargs["run"] = this_run
295 ref = butler.put(metric, *args, **kwargs)
296 self.assertIsInstance(ref, DatasetRef)
298 # Test getDirect
299 metricOut = butler.get(ref)
300 self.assertEqual(metric, metricOut)
301 # Test get
302 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
303 self.assertEqual(metric, metricOut)
304 # Test get with a datasetRef
305 metricOut = butler.get(ref)
306 self.assertEqual(metric, metricOut)
307 # Test getDeferred with dataId
308 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
309 self.assertEqual(metric, metricOut)
310 # Test getDeferred with a ref
311 metricOut = butler.getDeferred(ref).get()
312 self.assertEqual(metric, metricOut)
314 # Check we can get components
315 if storageClass.isComposite():
316 self.assertGetComponents(
317 butler, ref, ("summary", "data", "output"), metric, collections=this_run
318 )
320 primary_uri, secondary_uris = butler.getURIs(ref)
321 n_uris = len(secondary_uris)
322 if primary_uri:
323 n_uris += 1
325 # Can the artifacts themselves be retrieved?
326 if not butler._datastore.isEphemeral:
327 # Create a temporary directory to hold the retrieved
328 # artifacts.
329 with tempfile.TemporaryDirectory(
330 prefix="butler-artifacts-", ignore_cleanup_errors=True
331 ) as artifact_root:
332 root_uri = ResourcePath(artifact_root, forceDirectory=True)
334 for preserve_path in (True, False):
335 destination = root_uri.join(f"{preserve_path}_{counter}/")
336 log = logging.getLogger("lsst.x")
337 log.warning("Using destination %s for args %s", destination, args)
338 # Use copy so that we can test that overwrite
339 # protection works (using "auto" for File URIs
340 # would use hard links and subsequent transfer
341 # would work because it knows they are the same
342 # file).
343 transferred = butler.retrieveArtifacts(
344 [ref], destination, preserve_path=preserve_path, transfer="copy"
345 )
346 self.assertGreater(len(transferred), 0)
347 artifacts = list(ResourcePath.findFileResources([destination]))
348 self.assertEqual(set(transferred), set(artifacts))
350 for artifact in transferred:
351 path_in_destination = artifact.relative_to(destination)
352 self.assertIsNotNone(path_in_destination)
353 assert path_in_destination is not None
355 # When path is not preserved there should not
356 # be any path separators.
357 num_seps = path_in_destination.count("/")
358 if preserve_path:
359 self.assertGreater(num_seps, 0)
360 else:
361 self.assertEqual(num_seps, 0)
363 self.assertEqual(
364 len(artifacts),
365 n_uris,
366 "Comparing expected artifacts vs actual:"
367 f" {artifacts} vs {primary_uri} and {secondary_uris}",
368 )
370 if preserve_path:
371 # No need to run these twice
372 with self.assertRaises(ValueError):
373 butler.retrieveArtifacts([ref], destination, transfer="move")
375 with self.assertRaises(FileExistsError):
376 butler.retrieveArtifacts([ref], destination)
378 transferred_again = butler.retrieveArtifacts(
379 [ref], destination, preserve_path=preserve_path, overwrite=True
380 )
381 self.assertEqual(set(transferred_again), set(transferred))
383 # Now remove the dataset completely.
384 butler.pruneDatasets([ref], purge=True, unstore=True)
385 # Lookup with original args should still fail.
386 kwargs = {"collections": this_run}
387 if isinstance(args[0], DatasetRef):
388 kwargs = {} # Prevent warning from being issued.
389 self.assertFalse(butler.exists(*args, **kwargs))
390 # get() should still fail.
391 with self.assertRaises(FileNotFoundError):
392 butler.get(ref)
393 # Registry shouldn't be able to find it by dataset_id anymore.
394 self.assertIsNone(butler.get_dataset(ref.id))
396 # Do explicit registry removal since we know they are
397 # empty
398 butler.registry.removeCollection(this_run)
399 expected_collections.remove(this_run)
401 # Create DatasetRef for put using default run.
402 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
404 # Check that getDeferred fails with standalone ref.
405 with self.assertRaises(LookupError):
406 butler.getDeferred(refIn)
408 # Put the dataset again, since the last thing we did was remove it
409 # and we want to use the default collection.
410 ref = butler.put(metric, refIn)
412 # Get with parameters
413 stop = 4
414 sliced = butler.get(ref, parameters={"slice": slice(stop)})
415 self.assertNotEqual(metric, sliced)
416 self.assertEqual(metric.summary, sliced.summary)
417 self.assertEqual(metric.output, sliced.output)
418 assert metric.data is not None # for mypy
419 self.assertEqual(metric.data[:stop], sliced.data)
420 # getDeferred with parameters
421 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
422 self.assertNotEqual(metric, sliced)
423 self.assertEqual(metric.summary, sliced.summary)
424 self.assertEqual(metric.output, sliced.output)
425 self.assertEqual(metric.data[:stop], sliced.data)
426 # getDeferred with deferred parameters
427 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
428 self.assertNotEqual(metric, sliced)
429 self.assertEqual(metric.summary, sliced.summary)
430 self.assertEqual(metric.output, sliced.output)
431 self.assertEqual(metric.data[:stop], sliced.data)
433 if storageClass.isComposite():
434 # Check that components can be retrieved
435 metricOut = butler.get(ref.datasetType.name, dataId)
436 compNameS = ref.datasetType.componentTypeName("summary")
437 compNameD = ref.datasetType.componentTypeName("data")
438 summary = butler.get(compNameS, dataId)
439 self.assertEqual(summary, metric.summary)
440 data = butler.get(compNameD, dataId)
441 self.assertEqual(data, metric.data)
443 if "counter" in storageClass.derivedComponents:
444 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
445 self.assertEqual(count, len(data))
447 count = butler.get(
448 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
449 )
450 self.assertEqual(count, stop)
452 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections)
453 assert compRef is not None
454 summary = butler.get(compRef)
455 self.assertEqual(summary, metric.summary)
457 # Create a Dataset type that has the same name but is inconsistent.
458 inconsistentDatasetType = DatasetType(
459 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
460 )
462 # Getting with a dataset type that does not match registry fails
463 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
464 butler.get(inconsistentDatasetType, dataId)
466 # Combining a DatasetRef with a dataId should fail
467 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
468 butler.get(ref, dataId)
469 # Getting with an explicit ref should fail if the id doesn't match.
470 with self.assertRaises(FileNotFoundError):
471 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
473 # Getting a dataset with unknown parameters should fail
474 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
475 butler.get(ref, parameters={"unsupported": True})
477 # Check we have a collection
478 collections = set(butler.registry.queryCollections())
479 self.assertEqual(collections, expected_collections)
481 # Clean up to check that we can remove something that may have
482 # already had a component removed
483 butler.pruneDatasets([ref], unstore=True, purge=True)
485 # Add the same ref again, so we can check that duplicate put fails.
486 ref = butler.put(metric, datasetType, dataId)
488 # Repeat put will fail.
489 with self.assertRaisesRegex(
490 ConflictingDefinitionError, "A database constraint failure was triggered"
491 ):
492 butler.put(metric, datasetType, dataId)
494 # Remove the datastore entry.
495 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
497 # Put will still fail
498 with self.assertRaisesRegex(
499 ConflictingDefinitionError, "A database constraint failure was triggered"
500 ):
501 butler.put(metric, datasetType, dataId)
503 # Repeat the same sequence with resolved ref.
504 butler.pruneDatasets([ref], unstore=True, purge=True)
505 ref = butler.put(metric, refIn)
507 # Repeat put will fail.
508 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
509 butler.put(metric, refIn)
511 # Remove the datastore entry.
512 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
514 # In case of resolved ref this write will succeed.
515 ref = butler.put(metric, refIn)
517 # Leave the dataset in place since some downstream tests require
518 # something to be present
520 return butler
522 def testDeferredCollectionPassing(self) -> None:
523 # Construct a butler with no run or collection, but make it writeable.
524 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
525 # Create and register a DatasetType
526 dimensions = butler.dimensions.conform(["instrument", "visit"])
527 datasetType = self.addDatasetType(
528 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
529 )
530 # Add needed Dimensions
531 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
532 butler.registry.insertDimensionData(
533 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
534 )
535 butler.registry.insertDimensionData(
536 "visit",
537 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
538 )
539 dataId = {"instrument": "DummyCamComp", "visit": 423}
540 # Create dataset.
541 metric = makeExampleMetrics()
542 # Register a new run and put dataset.
543 run = "deferred"
544 self.assertTrue(butler.registry.registerRun(run))
545 # Second time it will be allowed but indicate no-op
546 self.assertFalse(butler.registry.registerRun(run))
547 ref = butler.put(metric, datasetType, dataId, run=run)
548 # Putting with no run should fail with TypeError.
549 with self.assertRaises(CollectionError):
550 butler.put(metric, datasetType, dataId)
551 # Dataset should exist.
552 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
553 # We should be able to get the dataset back, but with and without
554 # a deferred dataset handle.
555 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
556 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
557 # Trying to find the dataset without any collection is a TypeError.
558 self.assertFalse(butler.exists(datasetType, dataId))
559 with self.assertRaises(CollectionError):
560 butler.get(datasetType, dataId)
561 # Associate the dataset with a different collection.
562 butler.registry.registerCollection("tagged")
563 butler.registry.associate("tagged", [ref])
564 # Deleting the dataset from the new collection should make it findable
565 # in the original collection.
566 butler.pruneDatasets([ref], tags=["tagged"])
567 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
570class ButlerTests(ButlerPutGetTests):
571 """Tests for Butler."""
573 useTempRoot = True
574 validationCanFail: bool
575 fullConfigKey: str | None
576 registryStr: str | None
577 datastoreName: list[str] | None
578 datastoreStr: list[str]
580 def setUp(self) -> None:
581 """Create a new butler root for each test."""
582 self.root = makeTestTempDir(TESTDIR)
583 Butler.makeRepo(self.root, config=Config(self.configFile))
584 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
586 def testConstructor(self) -> None:
587 """Independent test of constructor."""
588 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
589 self.assertIsInstance(butler, Butler)
591 # Check that butler.yaml is added automatically.
592 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
593 config_dir = self.tmpConfigFile[: -len(end)]
594 butler = Butler.from_config(config_dir, run=self.default_run)
595 self.assertIsInstance(butler, Butler)
597 # Even with a ResourcePath.
598 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
599 self.assertIsInstance(butler, Butler)
601 collections = set(butler.registry.queryCollections())
602 self.assertEqual(collections, {self.default_run})
604 # Check that some special characters can be included in run name.
605 special_run = "u@b.c-A"
606 butler_special = Butler.from_config(butler=butler, run=special_run)
607 collections = set(butler_special.registry.queryCollections("*@*"))
608 self.assertEqual(collections, {special_run})
610 butler2 = Butler.from_config(butler=butler, collections=["other"])
611 self.assertEqual(butler2.collections, ("other",))
612 self.assertIsNone(butler2.run)
613 self.assertIs(butler._datastore, butler2._datastore)
615 # Test that we can use an environment variable to find this
616 # repository.
617 butler_index = Config()
618 butler_index["label"] = self.tmpConfigFile
619 for suffix in (".yaml", ".json"):
620 # Ensure that the content differs so that we know that
621 # we aren't reusing the cache.
622 bad_label = f"file://bucket/not_real{suffix}"
623 butler_index["bad_label"] = bad_label
624 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
625 butler_index.dumpToUri(temp_file)
626 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
627 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
628 uri = Butler.get_repo_uri("bad_label")
629 self.assertEqual(uri, ResourcePath(bad_label))
630 uri = Butler.get_repo_uri("label")
631 butler = Butler.from_config(uri, writeable=False)
632 self.assertIsInstance(butler, Butler)
633 butler = Butler.from_config("label", writeable=False)
634 self.assertIsInstance(butler, Butler)
635 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
636 Butler.from_config("not_there", writeable=False)
637 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
638 Butler.from_config("bad_label")
639 with self.assertRaises(FileNotFoundError):
640 # Should ignore aliases.
641 Butler.from_config(ResourcePath("label", forceAbsolute=False))
642 with self.assertRaises(KeyError) as cm:
643 Butler.get_repo_uri("missing")
644 self.assertEqual(
645 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
646 )
647 self.assertIn("not known to", str(cm.exception))
648 # Should report no failure.
649 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
650 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
651 # Now with empty configuration.
652 butler_index = Config()
653 butler_index.dumpToUri(temp_file)
654 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
655 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
656 Butler.from_config("label")
657 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
658 # Now with bad contents.
659 with open(temp_file.ospath, "w") as fh:
660 print("'", file=fh)
661 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
662 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
663 Butler.from_config("label")
664 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
665 with self.assertRaises(FileNotFoundError):
666 Butler.get_repo_uri("label")
667 self.assertEqual(Butler.get_known_repos(), set())
669 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
670 Butler.from_config("label")
672 # Check that we can create Butler when the alias file is not found.
673 butler = Butler.from_config(self.tmpConfigFile, writeable=False)
674 self.assertIsInstance(butler, Butler)
675 with self.assertRaises(KeyError) as cm:
676 # No environment variable set.
677 Butler.get_repo_uri("label")
678 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
679 self.assertIn("No repository index defined", str(cm.exception))
680 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
681 # No aliases registered.
682 Butler.from_config("not_there")
683 self.assertEqual(Butler.get_known_repos(), set())
685 def testBasicPutGet(self) -> None:
686 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
687 self.runPutGetTest(storageClass, "test_metric")
689 def testCompositePutGetConcrete(self) -> None:
690 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
691 butler = self.runPutGetTest(storageClass, "test_metric")
693 # Should *not* be disassembled
694 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
695 self.assertEqual(len(datasets), 1)
696 uri, components = butler.getURIs(datasets[0])
697 self.assertIsInstance(uri, ResourcePath)
698 self.assertFalse(components)
699 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
700 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
702 # Predicted dataset
703 dataId = {"instrument": "DummyCamComp", "visit": 424}
704 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
705 self.assertFalse(components)
706 self.assertIsInstance(uri, ResourcePath)
707 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
708 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
710 def testCompositePutGetVirtual(self) -> None:
711 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
712 butler = self.runPutGetTest(storageClass, "test_metric_comp")
714 # Should be disassembled
715 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
716 self.assertEqual(len(datasets), 1)
717 uri, components = butler.getURIs(datasets[0])
719 if butler._datastore.isEphemeral:
720 # Never disassemble in-memory datastore
721 self.assertIsInstance(uri, ResourcePath)
722 self.assertFalse(components)
723 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
724 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
725 else:
726 self.assertIsNone(uri)
727 self.assertEqual(set(components), set(storageClass.components))
728 for compuri in components.values():
729 self.assertIsInstance(compuri, ResourcePath)
730 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
731 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
733 # Predicted dataset
734 dataId = {"instrument": "DummyCamComp", "visit": 424}
735 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
737 if butler._datastore.isEphemeral:
738 # Never disassembled
739 self.assertIsInstance(uri, ResourcePath)
740 self.assertFalse(components)
741 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
742 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
743 else:
744 self.assertIsNone(uri)
745 self.assertEqual(set(components), set(storageClass.components))
746 for compuri in components.values():
747 self.assertIsInstance(compuri, ResourcePath)
748 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
749 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
751 def testStorageClassOverrideGet(self) -> None:
752 """Test storage class conversion on get with override."""
753 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
754 datasetTypeName = "anything"
755 run = self.default_run
757 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
759 # Create and store a dataset.
760 metric = makeExampleMetrics()
761 dataId = {"instrument": "DummyCamComp", "visit": 423}
763 ref = butler.put(metric, datasetType, dataId)
765 # Return native type.
766 retrieved = butler.get(ref)
767 self.assertEqual(retrieved, metric)
769 # Specify an override.
770 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
771 model = butler.get(ref, storageClass=new_sc)
772 self.assertNotEqual(type(model), type(retrieved))
773 self.assertIs(type(model), new_sc.pytype)
774 self.assertEqual(retrieved, model)
776 # Defer but override later.
777 deferred = butler.getDeferred(ref)
778 model = deferred.get(storageClass=new_sc)
779 self.assertIs(type(model), new_sc.pytype)
780 self.assertEqual(retrieved, model)
782 # Defer but override up front.
783 deferred = butler.getDeferred(ref, storageClass=new_sc)
784 model = deferred.get()
785 self.assertIs(type(model), new_sc.pytype)
786 self.assertEqual(retrieved, model)
788 # Retrieve a component. Should be a tuple.
789 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
790 self.assertIs(type(data), tuple)
791 self.assertEqual(data, tuple(retrieved.data))
793 # Parameter on the write storage class should work regardless
794 # of read storage class.
795 data = butler.get(
796 "anything.data",
797 dataId,
798 storageClass="StructuredDataDataTestTuple",
799 parameters={"slice": slice(2, 4)},
800 )
801 self.assertEqual(len(data), 2)
803 # Try a parameter that is known to the read storage class but not
804 # the write storage class.
805 with self.assertRaises(KeyError):
806 butler.get(
807 "anything.data",
808 dataId,
809 storageClass="StructuredDataDataTestTuple",
810 parameters={"xslice": slice(2, 4)},
811 )
813 def testPytypePutCoercion(self) -> None:
814 """Test python type coercion on Butler.get and put."""
815 # Store some data with the normal example storage class.
816 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
817 datasetTypeName = "test_metric"
818 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
820 dataId = {"instrument": "DummyCamComp", "visit": 423}
822 # Put a dict and this should coerce to a MetricsExample
823 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
824 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
825 test_metric = butler.get(metric_ref)
826 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
827 self.assertEqual(test_metric.summary, test_dict["summary"])
828 self.assertEqual(test_metric.output, test_dict["output"])
830 # Check that the put still works if a DatasetType is given with
831 # a definition matching this python type.
832 registry_type = butler.get_dataset_type(datasetTypeName)
833 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
834 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
835 self.assertEqual(metric2_ref.datasetType, registry_type)
837 # The get will return the type expected by registry.
838 test_metric2 = butler.get(metric2_ref)
839 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
841 # Make a new DatasetRef with the compatible but different DatasetType.
842 # This should now return a dict.
843 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
844 test_dict2 = butler.get(new_ref)
845 self.assertEqual(get_full_type_name(test_dict2), "dict")
847 # Get it again with the wrong dataset type definition using get()
848 # rather than get(). This should be consistent with get()
849 # behavior and return the type of the DatasetType.
850 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
851 self.assertEqual(get_full_type_name(test_dict3), "dict")
853 def testIngest(self) -> None:
854 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
856 # Create and register a DatasetType
857 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"])
859 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
860 datasetTypeName = "metric"
862 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
864 # Add needed Dimensions
865 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
866 butler.registry.insertDimensionData(
867 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
868 )
869 for detector in (1, 2):
870 butler.registry.insertDimensionData(
871 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
872 )
874 butler.registry.insertDimensionData(
875 "visit",
876 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
877 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
878 )
880 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
881 dataRoot = os.path.join(TESTDIR, "data", "basic")
882 datasets = []
883 for detector in (1, 2):
884 detector_name = f"detector_{detector}"
885 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
886 dataId = butler.registry.expandDataId(
887 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
888 )
889 # Create a DatasetRef for ingest
890 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
892 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
894 butler.ingest(*datasets, transfer="copy")
896 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
897 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
899 metrics1 = butler.get(datasetTypeName, dataId1)
900 metrics2 = butler.get(datasetTypeName, dataId2)
901 self.assertNotEqual(metrics1, metrics2)
903 # Compare URIs
904 uri1 = butler.getURI(datasetTypeName, dataId1)
905 uri2 = butler.getURI(datasetTypeName, dataId2)
906 self.assertNotEqual(uri1, uri2)
908 # Now do a multi-dataset but single file ingest
909 metricFile = os.path.join(dataRoot, "detectors.yaml")
910 refs = []
911 for detector in (1, 2):
912 detector_name = f"detector_{detector}"
913 dataId = butler.registry.expandDataId(
914 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
915 )
916 # Create a DatasetRef for ingest
917 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
919 # Test "move" transfer to ensure that the files themselves
920 # have disappeared following ingest.
921 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
922 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
924 datasets = []
925 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
927 # For first ingest use copy.
928 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
930 # Now try to ingest again in "execution butler" mode where
931 # the registry entries exist but the datastore does not have
932 # the files. We also need to strip the dimension records to ensure
933 # that they will be re-added by the ingest.
934 ref = datasets[0].refs[0]
935 datasets[0].refs = [
936 cast(
937 DatasetRef,
938 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run),
939 )
940 for ref in datasets[0].refs
941 ]
942 all_refs = []
943 for dataset in datasets:
944 refs = []
945 for ref in dataset.refs:
946 # Create a dict from the dataId to drop the records.
947 new_data_id = dict(ref.dataId.required)
948 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run)
949 assert new_ref is not None
950 self.assertFalse(new_ref.dataId.hasRecords())
951 refs.append(new_ref)
952 dataset.refs = refs
953 all_refs.extend(dataset.refs)
954 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
956 # Use move mode to test that the file is deleted. Also
957 # disable recording of file size.
958 butler.ingest(*datasets, transfer="move", record_validation_info=False)
960 # Check that every ref now has records.
961 for dataset in datasets:
962 for ref in dataset.refs:
963 self.assertTrue(ref.dataId.hasRecords())
965 # Ensure that the file has disappeared.
966 self.assertFalse(tempFile.exists())
968 # Check that the datastore recorded no file size.
969 # Not all datastores can support this.
970 try:
971 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
972 self.assertEqual(infos[0].file_size, -1)
973 except AttributeError:
974 pass
976 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
977 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
979 multi1 = butler.get(datasetTypeName, dataId1)
980 multi2 = butler.get(datasetTypeName, dataId2)
982 self.assertEqual(multi1, metrics1)
983 self.assertEqual(multi2, metrics2)
985 # Compare URIs
986 uri1 = butler.getURI(datasetTypeName, dataId1)
987 uri2 = butler.getURI(datasetTypeName, dataId2)
988 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
990 # Test that removing one does not break the second
991 # This line will issue a warning log message for a ChainedDatastore
992 # that uses an InMemoryDatastore since in-memory can not ingest
993 # files.
994 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
995 self.assertFalse(butler.exists(datasetTypeName, dataId1))
996 self.assertTrue(butler.exists(datasetTypeName, dataId2))
997 multi2b = butler.get(datasetTypeName, dataId2)
998 self.assertEqual(multi2, multi2b)
1000 # Ensure we can ingest 0 datasets
1001 datasets = []
1002 butler.ingest(*datasets)
1004 def testPickle(self) -> None:
1005 """Test pickle support."""
1006 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1007 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration"
1008 butlerOut = pickle.loads(pickle.dumps(butler))
1009 self.assertIsInstance(butlerOut, Butler)
1010 self.assertEqual(butlerOut._config, butler._config)
1011 self.assertEqual(butlerOut.collections, butler.collections)
1012 self.assertEqual(butlerOut.run, butler.run)
1014 def testGetDatasetTypes(self) -> None:
1015 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1016 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"])
1017 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
1018 (
1019 "instrument",
1020 [
1021 {"instrument": "DummyCam"},
1022 {"instrument": "DummyHSC"},
1023 {"instrument": "DummyCamComp"},
1024 ],
1025 ),
1026 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1027 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1028 ]
1029 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1030 # Add needed Dimensions
1031 for element, data in dimensionEntries:
1032 butler.registry.insertDimensionData(element, *data)
1034 # When a DatasetType is added to the registry entries are not created
1035 # for components but querying them can return the components.
1036 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1037 components = set()
1038 for datasetTypeName in datasetTypeNames:
1039 # Create and register a DatasetType
1040 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1042 for componentName in storageClass.components:
1043 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1045 fromRegistry: set[DatasetType] = set()
1046 for parent_dataset_type in butler.registry.queryDatasetTypes():
1047 fromRegistry.add(parent_dataset_type)
1048 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1049 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1051 # Now that we have some dataset types registered, validate them
1052 butler.validateConfiguration(
1053 ignore=[
1054 "test_metric_comp",
1055 "metric3",
1056 "metric5",
1057 "calexp",
1058 "DummySC",
1059 "datasetType.component",
1060 "random_data",
1061 "random_data_2",
1062 ]
1063 )
1065 # Add a new datasetType that will fail template validation
1066 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1067 if self.validationCanFail:
1068 with self.assertRaises(ValidationError):
1069 butler.validateConfiguration()
1071 # Rerun validation but with a subset of dataset type names
1072 butler.validateConfiguration(datasetTypeNames=["metric4"])
1074 # Rerun validation but ignore the bad datasetType
1075 butler.validateConfiguration(
1076 ignore=[
1077 "test_metric_comp",
1078 "metric3",
1079 "metric5",
1080 "calexp",
1081 "DummySC",
1082 "datasetType.component",
1083 "random_data",
1084 "random_data_2",
1085 ]
1086 )
1088 def testTransaction(self) -> None:
1089 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1090 datasetTypeName = "test_metric"
1091 dimensions = butler.dimensions.conform(["instrument", "visit"])
1092 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1093 ("instrument", {"instrument": "DummyCam"}),
1094 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1095 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1096 )
1097 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1098 metric = makeExampleMetrics()
1099 dataId = {"instrument": "DummyCam", "visit": 42}
1100 # Create and register a DatasetType
1101 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1102 with self.assertRaises(TransactionTestError):
1103 with butler.transaction():
1104 # Add needed Dimensions
1105 for args in dimensionEntries:
1106 butler.registry.insertDimensionData(*args)
1107 # Store a dataset
1108 ref = butler.put(metric, datasetTypeName, dataId)
1109 self.assertIsInstance(ref, DatasetRef)
1110 # Test getDirect
1111 metricOut = butler.get(ref)
1112 self.assertEqual(metric, metricOut)
1113 # Test get
1114 metricOut = butler.get(datasetTypeName, dataId)
1115 self.assertEqual(metric, metricOut)
1116 # Check we can get components
1117 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1118 raise TransactionTestError("This should roll back the entire transaction")
1119 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1120 butler.registry.expandDataId(dataId)
1121 # Should raise LookupError for missing data ID value
1122 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1123 butler.get(datasetTypeName, dataId)
1124 # Also check explicitly if Dataset entry is missing
1125 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections))
1126 # Direct retrieval should not find the file in the Datastore
1127 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1128 butler.get(ref)
1130 def testMakeRepo(self) -> None:
1131 """Test that we can write butler configuration to a new repository via
1132 the Butler.makeRepo interface and then instantiate a butler from the
1133 repo root.
1134 """
1135 # Do not run the test if we know this datastore configuration does
1136 # not support a file system root
1137 if self.fullConfigKey is None:
1138 return
1140 # create two separate directories
1141 root1 = tempfile.mkdtemp(dir=self.root)
1142 root2 = tempfile.mkdtemp(dir=self.root)
1144 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1145 limited = Config(self.configFile)
1146 butler1 = Butler.from_config(butlerConfig)
1147 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration"
1148 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1149 full = Config(self.tmpConfigFile)
1150 butler2 = Butler.from_config(butlerConfig)
1151 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration"
1152 # Butlers should have the same configuration regardless of whether
1153 # defaults were expanded.
1154 self.assertEqual(butler1._config, butler2._config)
1155 # Config files loaded directly should not be the same.
1156 self.assertNotEqual(limited, full)
1157 # Make sure "limited" doesn't have a few keys we know it should be
1158 # inheriting from defaults.
1159 self.assertIn(self.fullConfigKey, full)
1160 self.assertNotIn(self.fullConfigKey, limited)
1162 # Collections don't appear until something is put in them
1163 collections1 = set(butler1.registry.queryCollections())
1164 self.assertEqual(collections1, set())
1165 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1167 # Check that a config with no associated file name will not
1168 # work properly with relocatable Butler repo
1169 butlerConfig.configFile = None
1170 with self.assertRaises(ValueError):
1171 Butler.from_config(butlerConfig)
1173 with self.assertRaises(FileExistsError):
1174 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1176 def testStringification(self) -> None:
1177 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1178 butlerStr = str(butler)
1180 if self.datastoreStr is not None:
1181 for testStr in self.datastoreStr:
1182 self.assertIn(testStr, butlerStr)
1183 if self.registryStr is not None:
1184 self.assertIn(self.registryStr, butlerStr)
1186 datastoreName = butler._datastore.name
1187 if self.datastoreName is not None:
1188 for testStr in self.datastoreName:
1189 self.assertIn(testStr, datastoreName)
1191 def testButlerRewriteDataId(self) -> None:
1192 """Test that dataIds can be rewritten based on dimension records."""
1193 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1195 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1196 datasetTypeName = "random_data"
1198 # Create dimension records.
1199 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1200 butler.registry.insertDimensionData(
1201 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1202 )
1203 butler.registry.insertDimensionData(
1204 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1205 )
1207 dimensions = butler.dimensions.conform(["instrument", "exposure"])
1208 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1209 butler.registry.registerDatasetType(datasetType)
1211 n_exposures = 5
1212 dayobs = 20210530
1214 for i in range(n_exposures):
1215 butler.registry.insertDimensionData(
1216 "exposure",
1217 {
1218 "instrument": "DummyCamComp",
1219 "id": i,
1220 "obs_id": f"exp{i}",
1221 "seq_num": i,
1222 "day_obs": dayobs,
1223 "physical_filter": "d-r",
1224 },
1225 )
1227 # Write some data.
1228 for i in range(n_exposures):
1229 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1231 # Use the seq_num for the put to test rewriting.
1232 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1233 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1235 # Check that the exposure is correct in the dataId
1236 self.assertEqual(ref.dataId["exposure"], i)
1238 # and check that we can get the dataset back with the same dataId
1239 new_metric = butler.get(datasetTypeName, dataId=dataId)
1240 self.assertEqual(new_metric, metric)
1243class FileDatastoreButlerTests(ButlerTests):
1244 """Common tests and specialization of ButlerTests for butlers backed
1245 by datastores that inherit from FileDatastore.
1246 """
1248 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1249 """Check if file exists at a given path (relative to root).
1251 Test testPutTemplates verifies actual physical existance of the files
1252 in the requested location.
1253 """
1254 uri = ResourcePath(root, forceDirectory=True)
1255 return uri.join(relpath).exists()
1257 def testPutTemplates(self) -> None:
1258 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1259 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1261 # Add needed Dimensions
1262 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1263 butler.registry.insertDimensionData(
1264 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1265 )
1266 butler.registry.insertDimensionData(
1267 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1268 )
1269 butler.registry.insertDimensionData(
1270 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1271 )
1273 # Create and store a dataset
1274 metric = makeExampleMetrics()
1276 # Create two almost-identical DatasetTypes (both will use default
1277 # template)
1278 dimensions = butler.dimensions.conform(["instrument", "visit"])
1279 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1280 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1281 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1283 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1284 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1286 # Put with exactly the data ID keys needed
1287 ref = butler.put(metric, "metric1", dataId1)
1288 uri = butler.getURI(ref)
1289 self.assertTrue(uri.exists())
1290 self.assertTrue(
1291 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1292 )
1294 # Check the template based on dimensions
1295 if hasattr(butler._datastore, "templates"):
1296 butler._datastore.templates.validateTemplates([ref])
1298 # Put with extra data ID keys (physical_filter is an optional
1299 # dependency); should not change template (at least the way we're
1300 # defining them to behave now; the important thing is that they
1301 # must be consistent).
1302 ref = butler.put(metric, "metric2", dataId2)
1303 uri = butler.getURI(ref)
1304 self.assertTrue(uri.exists())
1305 self.assertTrue(
1306 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1307 )
1309 # Check the template based on dimensions
1310 if hasattr(butler._datastore, "templates"):
1311 butler._datastore.templates.validateTemplates([ref])
1313 # Use a template that has a typo in dimension record metadata.
1314 # Easier to test with a butler that has a ref with records attached.
1315 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1316 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1317 path = template.format(ref)
1318 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1320 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1321 with self.assertRaises(KeyError):
1322 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1323 template.format(ref)
1325 # Now use a file template that will not result in unique filenames
1326 with self.assertRaises(FileTemplateValidationError):
1327 butler.put(metric, "metric3", dataId1)
1329 def testImportExport(self) -> None:
1330 # Run put/get tests just to create and populate a repo.
1331 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1332 self.runImportExportTest(storageClass)
1334 @unittest.expectedFailure
1335 def testImportExportVirtualComposite(self) -> None:
1336 # Run put/get tests just to create and populate a repo.
1337 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1338 self.runImportExportTest(storageClass)
1340 def runImportExportTest(self, storageClass: StorageClass) -> None:
1341 """Test exporting and importing.
1343 This test does an export to a temp directory and an import back
1344 into a new temp directory repo. It does not assume a posix datastore.
1345 """
1346 exportButler = self.runPutGetTest(storageClass, "test_metric")
1348 # Test that we must have a file extension.
1349 with self.assertRaises(ValueError):
1350 with exportButler.export(filename="dump", directory=".") as export:
1351 pass
1353 # Test that unknown format is not allowed.
1354 with self.assertRaises(ValueError):
1355 with exportButler.export(filename="dump.fits", directory=".") as export:
1356 pass
1358 # Test that the repo actually has at least one dataset.
1359 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1360 self.assertGreater(len(datasets), 0)
1361 # Add a DimensionRecord that's unused by those datasets.
1362 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1363 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1364 # Export and then import datasets.
1365 with safeTestTempDir(TESTDIR) as exportDir:
1366 exportFile = os.path.join(exportDir, "exports.yaml")
1367 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1368 export.saveDatasets(datasets)
1369 # Export the same datasets again. This should quietly do
1370 # nothing because of internal deduplication, and it shouldn't
1371 # complain about being asked to export the "htm7" elements even
1372 # though there aren't any in these datasets or in the database.
1373 export.saveDatasets(datasets, elements=["htm7"])
1374 # Save one of the data IDs again; this should be harmless
1375 # because of internal deduplication.
1376 export.saveDataIds([datasets[0].dataId])
1377 # Save some dimension records directly.
1378 export.saveDimensionData("skymap", [skymapRecord])
1379 self.assertTrue(os.path.exists(exportFile))
1380 with safeTestTempDir(TESTDIR) as importDir:
1381 # We always want this to be a local posix butler
1382 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1383 # Calling script.butlerImport tests the implementation of the
1384 # butler command line interface "import" subcommand. Functions
1385 # in the script folder are generally considered protected and
1386 # should not be used as public api.
1387 with open(exportFile) as f:
1388 script.butlerImport(
1389 importDir,
1390 export_file=f,
1391 directory=exportDir,
1392 transfer="auto",
1393 skip_dimensions=None,
1394 )
1395 importButler = Butler.from_config(importDir, run=self.default_run)
1396 for ref in datasets:
1397 with self.subTest(ref=ref):
1398 # Test for existence by passing in the DatasetType and
1399 # data ID separately, to avoid lookup by dataset_id.
1400 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1401 self.assertEqual(
1402 list(importButler.registry.queryDimensionRecords("skymap")),
1403 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1404 )
1406 def testRemoveRuns(self) -> None:
1407 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1408 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
1409 # Load registry data with dimensions to hang datasets off of.
1410 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1411 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1412 # Add some RUN-type collection.
1413 run1 = "run1"
1414 butler.registry.registerRun(run1)
1415 run2 = "run2"
1416 butler.registry.registerRun(run2)
1417 # put a dataset in each
1418 metric = makeExampleMetrics()
1419 dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
1420 datasetType = self.addDatasetType(
1421 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1422 )
1423 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1424 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1425 uri1 = butler.getURI(ref1)
1426 uri2 = butler.getURI(ref2)
1428 with self.assertRaises(OrphanedRecordError):
1429 butler.registry.removeDatasetType(datasetType.name)
1431 # Remove from both runs with different values for unstore.
1432 butler.removeRuns([run1], unstore=True)
1433 butler.removeRuns([run2], unstore=False)
1434 # Should be nothing in registry for either one, and datastore should
1435 # not think either exists.
1436 with self.assertRaises(MissingCollectionError):
1437 butler.registry.getCollectionType(run1)
1438 with self.assertRaises(MissingCollectionError):
1439 butler.registry.getCollectionType(run2)
1440 self.assertFalse(butler.stored(ref1))
1441 self.assertFalse(butler.stored(ref2))
1442 # The ref we unstored should be gone according to the URI, but the
1443 # one we forgot should still be around.
1444 self.assertFalse(uri1.exists())
1445 self.assertTrue(uri2.exists())
1447 # Now that the collections have been pruned we can remove the
1448 # dataset type
1449 butler.registry.removeDatasetType(datasetType.name)
1451 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm:
1452 butler.registry.removeDatasetType(("test*", "test*"))
1453 self.assertIn("not defined", "\n".join(cm.output))
1456class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1457 """PosixDatastore specialization of a butler"""
1459 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1460 fullConfigKey: str | None = ".datastore.formatters"
1461 validationCanFail = True
1462 datastoreStr = ["/tmp"]
1463 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1464 registryStr = "/gen3.sqlite3"
1466 def testPathConstructor(self) -> None:
1467 """Independent test of constructor using PathLike."""
1468 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1469 self.assertIsInstance(butler, Butler)
1471 # And again with a Path object with the butler yaml
1472 path = pathlib.Path(self.tmpConfigFile)
1473 butler = Butler.from_config(path, writeable=False)
1474 self.assertIsInstance(butler, Butler)
1476 # And again with a Path object without the butler yaml
1477 # (making sure we skip it if the tmp config doesn't end
1478 # in butler.yaml -- which is the case for a subclass)
1479 if self.tmpConfigFile.endswith("butler.yaml"):
1480 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1481 butler = Butler.from_config(path, writeable=False)
1482 self.assertIsInstance(butler, Butler)
1484 def testExportTransferCopy(self) -> None:
1485 """Test local export using all transfer modes"""
1486 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1487 exportButler = self.runPutGetTest(storageClass, "test_metric")
1488 # Test that the repo actually has at least one dataset.
1489 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1490 self.assertGreater(len(datasets), 0)
1491 uris = [exportButler.getURI(d) for d in datasets]
1492 assert isinstance(exportButler._datastore, FileDatastore)
1493 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1495 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1497 for path in pathsInStore:
1498 # Assume local file system
1499 assert path is not None
1500 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1502 for transfer in ("copy", "link", "symlink", "relsymlink"):
1503 with safeTestTempDir(TESTDIR) as exportDir:
1504 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1505 export.saveDatasets(datasets)
1506 for path in pathsInStore:
1507 assert path is not None
1508 self.assertTrue(
1509 self.checkFileExists(exportDir, path),
1510 f"Check that mode {transfer} exported files",
1511 )
1513 def testPruneDatasets(self) -> None:
1514 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1515 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
1516 assert isinstance(butler._datastore, FileDatastore)
1517 # Load registry data with dimensions to hang datasets off of.
1518 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1519 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1520 # Add some RUN-type collections.
1521 run1 = "run1"
1522 butler.registry.registerRun(run1)
1523 run2 = "run2"
1524 butler.registry.registerRun(run2)
1525 # put some datasets. ref1 and ref2 have the same data ID, and are in
1526 # different runs. ref3 has a different data ID.
1527 metric = makeExampleMetrics()
1528 dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
1529 datasetType = self.addDatasetType(
1530 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1531 )
1532 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1533 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1534 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1536 many_stored = butler.stored_many([ref1, ref2, ref3])
1537 for ref, stored in many_stored.items():
1538 self.assertTrue(stored, f"Ref {ref} should be stored")
1540 many_exists = butler._exists_many([ref1, ref2, ref3])
1541 for ref, exists in many_exists.items():
1542 self.assertTrue(exists, f"Checking ref {ref} exists.")
1543 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1545 # Simple prune.
1546 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1547 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1549 many_stored = butler.stored_many([ref1, ref2, ref3])
1550 for ref, stored in many_stored.items():
1551 self.assertFalse(stored, f"Ref {ref} should not be stored")
1553 many_exists = butler._exists_many([ref1, ref2, ref3])
1554 for ref, exists in many_exists.items():
1555 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1557 # Put data back.
1558 ref1_new = butler.put(metric, ref1)
1559 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1560 ref2 = butler.put(metric, ref2)
1562 many_stored = butler.stored_many([ref1, ref2, ref3])
1563 self.assertTrue(many_stored[ref1])
1564 self.assertTrue(many_stored[ref2])
1565 self.assertFalse(many_stored[ref3])
1567 ref3 = butler.put(metric, ref3)
1569 many_exists = butler._exists_many([ref1, ref2, ref3])
1570 for ref, exists in many_exists.items():
1571 self.assertTrue(exists, f"Ref {ref} should not be stored")
1573 # Clear out the datasets from registry and start again.
1574 refs = [ref1, ref2, ref3]
1575 butler.pruneDatasets(refs, purge=True, unstore=True)
1576 for ref in refs:
1577 butler.put(metric, ref)
1579 # Confirm we can retrieve deferred.
1580 dref1 = butler.getDeferred(ref1) # known and exists
1581 metric1 = dref1.get()
1582 self.assertEqual(metric1, metric)
1584 # Test different forms of file availability.
1585 # Need to be in a state where:
1586 # - one ref just has registry record.
1587 # - one ref has a missing file but a datastore record.
1588 # - one ref has a missing datastore record but file is there.
1589 # - one ref does not exist anywhere.
1590 # Do not need to test a ref that has everything since that is tested
1591 # above.
1592 ref0 = DatasetRef(
1593 datasetType,
1594 DataCoordinate.standardize(
1595 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1596 ),
1597 run=run1,
1598 )
1600 # Delete from datastore and retain in Registry.
1601 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1603 # File has been removed.
1604 uri2 = butler.getURI(ref2)
1605 uri2.remove()
1607 # Datastore has lost track.
1608 butler._datastore.forget([ref3])
1610 # First test with a standard butler.
1611 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1612 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1613 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1614 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1615 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1617 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1618 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1619 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1620 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1621 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1622 self.assertTrue(exists_many[ref2])
1624 # Check that per-ref query gives the same answer as many query.
1625 for ref, exists in exists_many.items():
1626 self.assertEqual(butler.exists(ref, full_check=False), exists)
1628 # Get deferred checks for existence before it allows it to be
1629 # retrieved.
1630 with self.assertRaises(LookupError):
1631 butler.getDeferred(ref3) # not known, file exists
1632 dref2 = butler.getDeferred(ref2) # known but file missing
1633 with self.assertRaises(FileNotFoundError):
1634 dref2.get()
1636 # Test again with a trusting butler.
1637 butler._datastore.trustGetRequest = True
1638 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1639 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1640 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1641 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1642 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1644 # When trusting we can get a deferred dataset handle that is not
1645 # known but does exist.
1646 dref3 = butler.getDeferred(ref3)
1647 metric3 = dref3.get()
1648 self.assertEqual(metric3, metric)
1650 # Check that per-ref query gives the same answer as many query.
1651 for ref, exists in exists_many.items():
1652 self.assertEqual(butler.exists(ref, full_check=True), exists)
1654 # Create a ref that surprisingly has the UUID of an existing ref
1655 # but is not the same.
1656 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1657 with self.assertRaises(ValueError):
1658 butler.exists(ref_bad)
1660 # Create a ref that has a compatible storage class.
1661 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1662 exists = butler.exists(ref_compat)
1663 self.assertEqual(exists, exists_many[ref2])
1665 # Remove everything and start from scratch.
1666 butler._datastore.trustGetRequest = False
1667 butler.pruneDatasets(refs, purge=True, unstore=True)
1668 for ref in refs:
1669 butler.put(metric, ref)
1671 # These tests mess directly with the trash table and can leave the
1672 # datastore in an odd state. Do them at the end.
1673 # Check that in normal mode, deleting the record will lead to
1674 # trash not touching the file.
1675 uri1 = butler.getURI(ref1)
1676 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1677 butler._datastore.forget([ref1])
1678 butler._datastore.trash(ref1)
1679 butler._datastore.emptyTrash()
1680 self.assertTrue(uri1.exists())
1681 uri1.remove() # Clean it up.
1683 # Simulate execution butler setup by deleting the datastore
1684 # record but keeping the file around and trusting.
1685 butler._datastore.trustGetRequest = True
1686 uris = butler.get_many_uris([ref2, ref3])
1687 uri2 = uris[ref2].primaryURI
1688 uri3 = uris[ref3].primaryURI
1689 self.assertTrue(uri2.exists())
1690 self.assertTrue(uri3.exists())
1692 # Remove the datastore record.
1693 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1694 butler._datastore.forget([ref2])
1695 self.assertTrue(uri2.exists())
1696 butler._datastore.trash([ref2, ref3])
1697 # Immediate removal for ref2 file
1698 self.assertFalse(uri2.exists())
1699 # But ref3 has to wait for the empty.
1700 self.assertTrue(uri3.exists())
1701 butler._datastore.emptyTrash()
1702 self.assertFalse(uri3.exists())
1704 # Clear out the datasets from registry.
1705 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1707 def testPytypeCoercion(self) -> None:
1708 """Test python type coercion on Butler.get and put."""
1709 # Store some data with the normal example storage class.
1710 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1711 datasetTypeName = "test_metric"
1712 butler = self.runPutGetTest(storageClass, datasetTypeName)
1714 dataId = {"instrument": "DummyCamComp", "visit": 423}
1715 metric = butler.get(datasetTypeName, dataId=dataId)
1716 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1718 datasetType_ori = butler.get_dataset_type(datasetTypeName)
1719 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1721 # Now need to hack the registry dataset type definition.
1722 # There is no API for this.
1723 assert isinstance(butler._registry, SqlRegistry)
1724 manager = butler._registry._managers.datasets
1725 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1726 manager._db.update(
1727 manager._static.dataset_type,
1728 {"name": datasetTypeName},
1729 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1730 )
1732 # Force reset of dataset type cache
1733 butler.registry.refresh()
1735 datasetType_new = butler.get_dataset_type(datasetTypeName)
1736 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1737 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1739 metric_model = butler.get(datasetTypeName, dataId=dataId)
1740 self.assertNotEqual(type(metric_model), type(metric))
1741 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1743 # Put the model and read it back to show that everything now
1744 # works as normal.
1745 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1746 metric_model_new = butler.get(metric_ref)
1747 self.assertEqual(metric_model_new, metric_model)
1749 # Hack the storage class again to something that will fail on the
1750 # get with no conversion class.
1751 manager._db.update(
1752 manager._static.dataset_type,
1753 {"name": datasetTypeName},
1754 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1755 )
1756 butler.registry.refresh()
1758 with self.assertRaises(ValueError):
1759 butler.get(datasetTypeName, dataId=dataId)
1762@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1763class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1764 """PosixDatastore specialization of a butler using Postgres"""
1766 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1767 fullConfigKey = ".datastore.formatters"
1768 validationCanFail = True
1769 datastoreStr = ["/tmp"]
1770 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1771 registryStr = "PostgreSQL@test"
1772 postgresql: Any
1774 @staticmethod
1775 def _handler(postgresql: Any) -> None:
1776 engine = sqlalchemy.engine.create_engine(postgresql.url())
1777 with engine.begin() as connection:
1778 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1780 @classmethod
1781 def setUpClass(cls) -> None:
1782 # Create the postgres test server.
1783 cls.postgresql = testing.postgresql.PostgresqlFactory(
1784 cache_initialized_db=True, on_initialized=cls._handler
1785 )
1786 super().setUpClass()
1788 @classmethod
1789 def tearDownClass(cls) -> None:
1790 # Clean up any lingering SQLAlchemy engines/connections
1791 # so they're closed before we shut down the server.
1792 gc.collect()
1793 cls.postgresql.clear_cache()
1794 super().tearDownClass()
1796 def setUp(self) -> None:
1797 self.server = self.postgresql()
1799 # Need to add a registry section to the config.
1800 self._temp_config = False
1801 config = Config(self.configFile)
1802 config["registry", "db"] = self.server.url()
1803 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1804 config.dump(fh)
1805 self.configFile = fh.name
1806 self._temp_config = True
1807 super().setUp()
1809 def tearDown(self) -> None:
1810 self.server.stop()
1811 if self._temp_config and os.path.exists(self.configFile):
1812 os.remove(self.configFile)
1813 super().tearDown()
1815 def testMakeRepo(self) -> None:
1816 # The base class test assumes that it's using sqlite and assumes
1817 # the config file is acceptable to sqlite.
1818 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1821class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1822 """InMemoryDatastore specialization of a butler"""
1824 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1825 fullConfigKey = None
1826 useTempRoot = False
1827 validationCanFail = False
1828 datastoreStr = ["datastore='InMemory"]
1829 datastoreName = ["InMemoryDatastore@"]
1830 registryStr = "/gen3.sqlite3"
1832 def testIngest(self) -> None:
1833 pass
1836class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1837 """PosixDatastore specialization"""
1839 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1840 fullConfigKey = ".datastore.datastores.1.formatters"
1841 validationCanFail = True
1842 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1843 datastoreName = [
1844 "InMemoryDatastore@",
1845 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1846 "SecondDatastore",
1847 ]
1848 registryStr = "/gen3.sqlite3"
1851class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1852 """Test that a yaml file in one location can refer to a root in another."""
1854 datastoreStr = ["dir1"]
1855 # Disable the makeRepo test since we are deliberately not using
1856 # butler.yaml as the config name.
1857 fullConfigKey = None
1859 def setUp(self) -> None:
1860 self.root = makeTestTempDir(TESTDIR)
1862 # Make a new repository in one place
1863 self.dir1 = os.path.join(self.root, "dir1")
1864 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1866 # Move the yaml file to a different place and add a "root"
1867 self.dir2 = os.path.join(self.root, "dir2")
1868 os.makedirs(self.dir2, exist_ok=True)
1869 configFile1 = os.path.join(self.dir1, "butler.yaml")
1870 config = Config(configFile1)
1871 config["root"] = self.dir1
1872 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1873 config.dumpToUri(configFile2)
1874 os.remove(configFile1)
1875 self.tmpConfigFile = configFile2
1877 def testFileLocations(self) -> None:
1878 self.assertNotEqual(self.dir1, self.dir2)
1879 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1880 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1881 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1884class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1885 """Test that a config file created by makeRepo outside of repo works."""
1887 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1889 def setUp(self) -> None:
1890 self.root = makeTestTempDir(TESTDIR)
1891 self.root2 = makeTestTempDir(TESTDIR)
1893 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1894 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1896 def tearDown(self) -> None:
1897 if os.path.exists(self.root2):
1898 shutil.rmtree(self.root2, ignore_errors=True)
1899 super().tearDown()
1901 def testConfigExistence(self) -> None:
1902 c = Config(self.tmpConfigFile)
1903 uri_config = ResourcePath(c["root"])
1904 uri_expected = ResourcePath(self.root, forceDirectory=True)
1905 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1906 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1908 def testPutGet(self) -> None:
1909 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1910 self.runPutGetTest(storageClass, "test_metric")
1913class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1914 """Test that a config file created by makeRepo outside of repo works."""
1916 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1918 def setUp(self) -> None:
1919 self.root = makeTestTempDir(TESTDIR)
1920 self.root2 = makeTestTempDir(TESTDIR)
1922 self.tmpConfigFile = self.root2
1923 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1925 def testConfigExistence(self) -> None:
1926 # Append the yaml file else Config constructor does not know the file
1927 # type.
1928 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1929 super().testConfigExistence()
1932class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1933 """Test that a config file created by makeRepo outside of repo works."""
1935 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1937 def setUp(self) -> None:
1938 self.root = makeTestTempDir(TESTDIR)
1939 self.root2 = makeTestTempDir(TESTDIR)
1941 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1942 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1945@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1946class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1947 """S3Datastore specialization of a butler; an S3 storage Datastore +
1948 a local in-memory SqlRegistry.
1949 """
1951 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1952 fullConfigKey = None
1953 validationCanFail = True
1955 bucketName = "anybucketname"
1956 """Name of the Bucket that will be used in the tests. The name is read from
1957 the config file used with the tests during set-up.
1958 """
1960 root = "butlerRoot/"
1961 """Root repository directory expected to be used in case useTempRoot=False.
1962 Otherwise the root is set to a 20 characters long randomly generated string
1963 during set-up.
1964 """
1966 datastoreStr = [f"datastore={root}"]
1967 """Contains all expected root locations in a format expected to be
1968 returned by Butler stringification.
1969 """
1971 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1972 """The expected format of the S3 Datastore string."""
1974 registryStr = "/gen3.sqlite3"
1975 """Expected format of the Registry string."""
1977 mock_s3 = mock_s3()
1978 """The mocked s3 interface from moto."""
1980 def genRoot(self) -> str:
1981 """Return a random string of len 20 to serve as a root
1982 name for the temporary bucket repo.
1984 This is equivalent to tempfile.mkdtemp as this is what self.root
1985 becomes when useTempRoot is True.
1986 """
1987 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1988 return rndstr + "/"
1990 def setUp(self) -> None:
1991 config = Config(self.configFile)
1992 uri = ResourcePath(config[".datastore.datastore.root"])
1993 self.bucketName = uri.netloc
1995 # Enable S3 mocking of tests.
1996 self.mock_s3.start()
1998 # set up some fake credentials if they do not exist
1999 self.usingDummyCredentials = setAwsEnvCredentials()
2001 if self.useTempRoot:
2002 self.root = self.genRoot()
2003 rooturi = f"s3://{self.bucketName}/{self.root}"
2004 config.update({"datastore": {"datastore": {"root": rooturi}}})
2006 # need local folder to store registry database
2007 self.reg_dir = makeTestTempDir(TESTDIR)
2008 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
2010 # MOTO needs to know that we expect Bucket bucketname to exist
2011 # (this used to be the class attribute bucketName)
2012 s3 = boto3.resource("s3")
2013 s3.create_bucket(Bucket=self.bucketName)
2015 self.datastoreStr = [f"datastore='{rooturi}'"]
2016 self.datastoreName = [f"FileDatastore@{rooturi}"]
2017 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
2018 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
2020 def tearDown(self) -> None:
2021 s3 = boto3.resource("s3")
2022 bucket = s3.Bucket(self.bucketName)
2023 try:
2024 bucket.objects.all().delete()
2025 except botocore.exceptions.ClientError as e:
2026 if e.response["Error"]["Code"] == "404":
2027 # the key was not reachable - pass
2028 pass
2029 else:
2030 raise
2032 bucket = s3.Bucket(self.bucketName)
2033 bucket.delete()
2035 # Stop the S3 mock.
2036 self.mock_s3.stop()
2038 # unset any potentially set dummy credentials
2039 if self.usingDummyCredentials:
2040 unsetAwsEnvCredentials()
2042 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2043 shutil.rmtree(self.reg_dir, ignore_errors=True)
2045 if self.useTempRoot and os.path.exists(self.root):
2046 shutil.rmtree(self.root, ignore_errors=True)
2048 super().tearDown()
2051class PosixDatastoreTransfers(unittest.TestCase):
2052 """Test data transfers between butlers.
2054 Test for different managers. UUID to UUID and integer to integer are
2055 tested. UUID to integer is not supported since we do not currently
2056 want to allow that. Integer to UUID is supported with the caveat
2057 that UUID4 will be generated and this will be incorrect for raw
2058 dataset types. The test ignores that.
2059 """
2061 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2062 storageClassFactory: StorageClassFactory
2064 @classmethod
2065 def setUpClass(cls) -> None:
2066 cls.storageClassFactory = StorageClassFactory()
2067 cls.storageClassFactory.addFromConfig(cls.configFile)
2069 def setUp(self) -> None:
2070 self.root = makeTestTempDir(TESTDIR)
2071 self.config = Config(self.configFile)
2073 def tearDown(self) -> None:
2074 removeTestTempDir(self.root)
2076 def create_butler(self, manager: str, label: str) -> Butler:
2077 config = Config(self.configFile)
2078 config["registry", "managers", "datasets"] = manager
2079 return Butler.from_config(
2080 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True
2081 )
2083 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2084 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2085 if manager1 is None:
2086 manager1 = default
2087 if manager2 is None:
2088 manager2 = default
2089 self.source_butler = self.create_butler(manager1, "1")
2090 self.target_butler = self.create_butler(manager2, "2")
2092 def testTransferUuidToUuid(self) -> None:
2093 self.create_butlers()
2094 self.assertButlerTransfers()
2096 def _enable_trust(self, datastore: Datastore) -> None:
2097 datastores = getattr(datastore, "datastores", [datastore])
2098 for this_datastore in datastores:
2099 if hasattr(this_datastore, "trustGetRequest"):
2100 this_datastore.trustGetRequest = True
2102 def testTransferMissing(self) -> None:
2103 """Test transfers where datastore records are missing.
2105 This is how execution butler works.
2106 """
2107 self.create_butlers()
2109 # Configure the source butler to allow trust.
2110 self._enable_trust(self.source_butler._datastore)
2112 self.assertButlerTransfers(purge=True)
2114 def testTransferMissingDisassembly(self) -> None:
2115 """Test transfers where datastore records are missing.
2117 This is how execution butler works.
2118 """
2119 self.create_butlers()
2121 # Configure the source butler to allow trust.
2122 self._enable_trust(self.source_butler._datastore)
2124 # Test disassembly.
2125 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2127 def testAbsoluteURITransferDirect(self) -> None:
2128 """Test transfer using an absolute URI."""
2129 self._absolute_transfer("auto")
2131 def testAbsoluteURITransferCopy(self) -> None:
2132 """Test transfer using an absolute URI."""
2133 self._absolute_transfer("copy")
2135 def _absolute_transfer(self, transfer: str) -> None:
2136 self.create_butlers()
2138 storageClassName = "StructuredData"
2139 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2140 datasetTypeName = "random_data"
2141 run = "run1"
2142 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2144 dimensions = self.source_butler.dimensions.conform(())
2145 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2146 self.source_butler.registry.registerDatasetType(datasetType)
2148 metrics = makeExampleMetrics()
2149 with ResourcePath.temporary_uri(suffix=".json") as temp:
2150 dataId = DataCoordinate.make_empty(self.source_butler.dimensions)
2151 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2152 temp.write(json.dumps(metrics.exportAsDict()).encode())
2153 dataset = FileDataset(path=temp, refs=source_refs)
2154 self.source_butler.ingest(dataset, transfer="direct")
2156 self.target_butler.transfer_from(
2157 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2158 )
2160 uri = self.target_butler.getURI(dataset.refs[0])
2161 if transfer == "auto":
2162 self.assertEqual(uri, temp)
2163 else:
2164 self.assertNotEqual(uri, temp)
2166 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2167 """Test that a run can be transferred to another butler."""
2168 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2169 datasetTypeName = "random_data"
2171 # Test will create 3 collections and we will want to transfer
2172 # two of those three.
2173 runs = ["run1", "run2", "other"]
2175 # Also want to use two different dataset types to ensure that
2176 # grouping works.
2177 datasetTypeNames = ["random_data", "random_data_2"]
2179 # Create the run collections in the source butler.
2180 for run in runs:
2181 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2183 # Create dimensions in source butler.
2184 n_exposures = 30
2185 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2186 self.source_butler.registry.insertDimensionData(
2187 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2188 )
2189 self.source_butler.registry.insertDimensionData(
2190 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2191 )
2193 for i in range(n_exposures):
2194 self.source_butler.registry.insertDimensionData(
2195 "exposure",
2196 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2197 )
2199 # Create dataset types in the source butler.
2200 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"])
2201 for datasetTypeName in datasetTypeNames:
2202 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2203 self.source_butler.registry.registerDatasetType(datasetType)
2205 # Write a dataset to an unrelated run -- this will ensure that
2206 # we are rewriting integer dataset ids in the target if necessary.
2207 # Will not be relevant for UUID.
2208 run = "distraction"
2209 butler = Butler.from_config(butler=self.source_butler, run=run)
2210 butler.put(
2211 makeExampleMetrics(),
2212 datasetTypeName,
2213 exposure=1,
2214 instrument="DummyCamComp",
2215 physical_filter="d-r",
2216 )
2218 # Write some example metrics to the source
2219 butler = Butler.from_config(butler=self.source_butler)
2221 # Set of DatasetRefs that should be in the list of refs to transfer
2222 # but which will not be transferred.
2223 deleted: set[DatasetRef] = set()
2225 n_expected = 20 # Number of datasets expected to be transferred
2226 source_refs = []
2227 for i in range(n_exposures):
2228 # Put a third of datasets into each collection, only retain
2229 # two thirds.
2230 index = i % 3
2231 run = runs[index]
2232 datasetTypeName = datasetTypeNames[i % 2]
2234 metric = MetricsExample(
2235 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2236 )
2237 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2238 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2240 # Remove the datastore record using low-level API, but only
2241 # for a specific index.
2242 if purge and index == 1:
2243 # For one of these delete the file as well.
2244 # This allows the "missing" code to filter the
2245 # file out.
2246 # Access the individual datastores.
2247 datastores = []
2248 if hasattr(butler._datastore, "datastores"):
2249 datastores.extend(butler._datastore.datastores)
2250 else:
2251 datastores.append(butler._datastore)
2253 if not deleted:
2254 # For a chained datastore we need to remove
2255 # files in each chain.
2256 for datastore in datastores:
2257 # The file might not be known to the datastore
2258 # if constraints are used.
2259 try:
2260 primary, uris = datastore.getURIs(ref)
2261 except FileNotFoundError:
2262 continue
2263 if primary and primary.scheme != "mem":
2264 primary.remove()
2265 for uri in uris.values():
2266 if uri.scheme != "mem":
2267 uri.remove()
2268 n_expected -= 1
2269 deleted.add(ref)
2271 # Remove the datastore record.
2272 for datastore in datastores:
2273 if hasattr(datastore, "removeStoredItemInfo"):
2274 datastore.removeStoredItemInfo(ref)
2276 if index < 2:
2277 source_refs.append(ref)
2278 if ref not in deleted:
2279 new_metric = butler.get(ref)
2280 self.assertEqual(new_metric, metric)
2282 # Create some bad dataset types to ensure we check for inconsistent
2283 # definitions.
2284 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2285 for datasetTypeName in datasetTypeNames:
2286 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2287 self.target_butler.registry.registerDatasetType(datasetType)
2288 with self.assertRaises(ConflictingDefinitionError) as cm:
2289 self.target_butler.transfer_from(self.source_butler, source_refs)
2290 self.assertIn("dataset type differs", str(cm.exception))
2292 # And remove the bad definitions.
2293 for datasetTypeName in datasetTypeNames:
2294 self.target_butler.registry.removeDatasetType(datasetTypeName)
2296 # Transfer without creating dataset types should fail.
2297 with self.assertRaises(KeyError):
2298 self.target_butler.transfer_from(self.source_butler, source_refs)
2300 # Transfer without creating dimensions should fail.
2301 with self.assertRaises(ConflictingDefinitionError) as cm:
2302 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2303 self.assertIn("dimension", str(cm.exception))
2305 # The failed transfer above leaves registry in an inconsistent
2306 # state because the run is created but then rolled back without
2307 # the collection cache being cleared. For now force a refresh.
2308 # Can remove with DM-35498.
2309 self.target_butler.registry.refresh()
2311 # Now transfer them to the second butler, including dimensions.
2312 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm:
2313 transferred = self.target_butler.transfer_from(
2314 self.source_butler,
2315 source_refs,
2316 register_dataset_types=True,
2317 transfer_dimensions=True,
2318 )
2319 self.assertEqual(len(transferred), n_expected)
2320 log_output = ";".join(log_cm.output)
2322 # A ChainedDatastore will use the in-memory datastore for mexists
2323 # so we can not rely on the mexists log message.
2324 self.assertIn("Number of datastore records found in source", log_output)
2325 self.assertIn("Creating output run", log_output)
2327 # Do the transfer twice to ensure that it will do nothing extra.
2328 # Only do this if purge=True because it does not work for int
2329 # dataset_id.
2330 if purge:
2331 # This should not need to register dataset types.
2332 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2333 self.assertEqual(len(transferred), n_expected)
2335 # Also do an explicit low-level transfer to trigger some
2336 # edge cases.
2337 with self.assertLogs(level=logging.DEBUG) as log_cm:
2338 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2339 log_output = ";".join(log_cm.output)
2340 self.assertIn("no file artifacts exist", log_output)
2342 with self.assertRaises((TypeError, AttributeError)):
2343 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2345 with self.assertRaises(ValueError):
2346 self.target_butler._datastore.transfer_from(
2347 self.source_butler._datastore, source_refs, transfer="split"
2348 )
2350 # Now try to get the same refs from the new butler.
2351 for ref in source_refs:
2352 if ref not in deleted:
2353 new_metric = self.target_butler.get(ref)
2354 old_metric = self.source_butler.get(ref)
2355 self.assertEqual(new_metric, old_metric)
2357 # Now prune run2 collection and create instead a CHAINED collection.
2358 # This should block the transfer.
2359 self.target_butler.removeRuns(["run2"], unstore=True)
2360 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2361 with self.assertRaises(CollectionTypeError):
2362 # Re-importing the run1 datasets can be problematic if they
2363 # use integer IDs so filter those out.
2364 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2365 self.target_butler.transfer_from(self.source_butler, to_transfer)
2368class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2369 """Test transfers using a chained datastore."""
2371 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2374class NullDatastoreTestCase(unittest.TestCase):
2375 """Test that we can fall back to a null datastore."""
2377 # Need a good config to create the repo.
2378 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2379 storageClassFactory: StorageClassFactory
2381 @classmethod
2382 def setUpClass(cls) -> None:
2383 cls.storageClassFactory = StorageClassFactory()
2384 cls.storageClassFactory.addFromConfig(cls.configFile)
2386 def setUp(self) -> None:
2387 """Create a new butler root for each test."""
2388 self.root = makeTestTempDir(TESTDIR)
2389 Butler.makeRepo(self.root, config=Config(self.configFile))
2391 def tearDown(self) -> None:
2392 removeTestTempDir(self.root)
2394 def test_fallback(self) -> None:
2395 # Read the butler config and mess with the datastore section.
2396 bad_config = Config(os.path.join(self.root, "butler.yaml"))
2397 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
2399 with self.assertRaises(RuntimeError):
2400 Butler.from_config(bad_config)
2402 butler = Butler.from_config(bad_config, writeable=True, without_datastore=True)
2403 self.assertIsInstance(butler._datastore, NullDatastore)
2405 # Check that registry is working.
2406 butler.registry.registerRun("MYRUN")
2407 collections = butler.registry.queryCollections(...)
2408 self.assertIn("MYRUN", set(collections))
2410 # Create a ref.
2411 dimensions = butler.dimensions.conform([])
2412 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2413 datasetTypeName = "metric"
2414 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2415 butler.registry.registerDatasetType(datasetType)
2416 ref = DatasetRef(datasetType, {}, run="MYRUN")
2418 # Check that datastore will complain.
2419 with self.assertRaises(FileNotFoundError):
2420 butler.get(ref)
2421 with self.assertRaises(FileNotFoundError):
2422 butler.getURI(ref)
2425def setup_module(module: types.ModuleType) -> None:
2426 """Set up the module for pytest."""
2427 clean_environment()
2430if __name__ == "__main__":
2431 clean_environment()
2432 unittest.main()