Coverage for tests/test_butler.py: 14%
1344 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Tests for Butler.
29"""
30from __future__ import annotations
32import gc
33import json
34import logging
35import os
36import pathlib
37import pickle
38import posixpath
39import random
40import shutil
41import string
42import tempfile
43import unittest
44import uuid
45from collections.abc import Mapping
46from typing import TYPE_CHECKING, Any, cast
48try:
49 import boto3
50 import botocore
51 from lsst.resources.s3utils import clean_test_environment_for_s3
53 try:
54 from moto import mock_aws # v5
55 except ImportError:
56 from moto import mock_s3 as mock_aws
57except ImportError:
58 boto3 = None
60 def mock_aws(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
61 """No-op decorator in case moto mock_aws can not be imported."""
62 return None
65try:
66 # It's possible but silly to have testing.postgresql installed without
67 # having the postgresql server installed (because then nothing in
68 # testing.postgresql would work), so we use the presence of that module
69 # to test whether we can expect the server to be available.
70 import testing.postgresql # type: ignore[import]
71except ImportError:
72 testing = None
74import astropy.time
75import sqlalchemy
76from lsst.daf.butler import (
77 Butler,
78 ButlerConfig,
79 ButlerRepoIndex,
80 CollectionType,
81 Config,
82 DataCoordinate,
83 DatasetExistence,
84 DatasetRef,
85 DatasetType,
86 FileDataset,
87 StorageClassFactory,
88 ValidationError,
89 script,
90)
91from lsst.daf.butler.datastore import NullDatastore
92from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError
93from lsst.daf.butler.datastores.fileDatastore import FileDatastore
94from lsst.daf.butler.direct_butler import DirectButler
95from lsst.daf.butler.registry import (
96 CollectionError,
97 CollectionTypeError,
98 ConflictingDefinitionError,
99 DataIdValueError,
100 MissingCollectionError,
101 OrphanedRecordError,
102)
103from lsst.daf.butler.registry.sql_registry import SqlRegistry
104from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG
105from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
106from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
107from lsst.resources import ResourcePath
108from lsst.utils import doImportType
109from lsst.utils.introspection import get_full_type_name
111if TYPE_CHECKING:
112 import types
114 from lsst.daf.butler import DimensionGroup, Registry, StorageClass
116TESTDIR = os.path.abspath(os.path.dirname(__file__))
119def clean_environment() -> None:
120 """Remove external environment variables that affect the tests."""
121 for k in ("DAF_BUTLER_REPOSITORY_INDEX",):
122 os.environ.pop(k, None)
125def makeExampleMetrics() -> MetricsExample:
126 """Return example dataset suitable for tests."""
127 return MetricsExample(
128 {"AM1": 5.2, "AM2": 30.6},
129 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
130 [563, 234, 456.7, 752, 8, 9, 27],
131 )
134class TransactionTestError(Exception):
135 """Specific error for testing transactions, to prevent misdiagnosing
136 that might otherwise occur when a standard exception is used.
137 """
139 pass
142class ButlerConfigTests(unittest.TestCase):
143 """Simple tests for ButlerConfig that are not tested in any other test
144 cases.
145 """
147 def testSearchPath(self) -> None:
148 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
149 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
150 config1 = ButlerConfig(configFile)
151 self.assertNotIn("testConfigs", "\n".join(cm.output))
153 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
154 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
155 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
156 self.assertIn("testConfigs", "\n".join(cm.output))
158 key = ("datastore", "records", "table")
159 self.assertNotEqual(config1[key], config2[key])
160 self.assertEqual(config2[key], "override_record")
163class ButlerPutGetTests(TestCaseMixin):
164 """Helper method for running a suite of put/get tests from different
165 butler configurations.
166 """
168 root: str
169 default_run = "ingésτ😺"
170 storageClassFactory: StorageClassFactory
171 configFile: str
172 tmpConfigFile: str
174 @staticmethod
175 def addDatasetType(
176 datasetTypeName: str, dimensions: DimensionGroup, storageClass: StorageClass | str, registry: Registry
177 ) -> DatasetType:
178 """Create a DatasetType and register it"""
179 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
180 registry.registerDatasetType(datasetType)
181 return datasetType
183 @classmethod
184 def setUpClass(cls) -> None:
185 cls.storageClassFactory = StorageClassFactory()
186 cls.storageClassFactory.addFromConfig(cls.configFile)
188 def assertGetComponents(
189 self,
190 butler: Butler,
191 datasetRef: DatasetRef,
192 components: tuple[str, ...],
193 reference: Any,
194 collections: Any = None,
195 ) -> None:
196 datasetType = datasetRef.datasetType
197 dataId = datasetRef.dataId
198 deferred = butler.getDeferred(datasetRef)
200 for component in components:
201 compTypeName = datasetType.componentTypeName(component)
202 result = butler.get(compTypeName, dataId, collections=collections)
203 self.assertEqual(result, getattr(reference, component))
204 result_deferred = deferred.get(component=component)
205 self.assertEqual(result_deferred, result)
207 def tearDown(self) -> None:
208 removeTestTempDir(self.root)
210 def create_butler(
211 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
212 ) -> tuple[DirectButler, DatasetType]:
213 butler = Butler.from_config(self.tmpConfigFile, run=run)
214 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration"
216 collections = set(butler.registry.queryCollections())
217 self.assertEqual(collections, {run})
219 # Create and register a DatasetType
220 dimensions = butler.dimensions.conform(["instrument", "visit"])
222 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
224 # Add needed Dimensions
225 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
226 butler.registry.insertDimensionData(
227 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
228 )
229 butler.registry.insertDimensionData(
230 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
231 )
232 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
233 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
234 butler.registry.insertDimensionData(
235 "visit",
236 {
237 "instrument": "DummyCamComp",
238 "id": 423,
239 "name": "fourtwentythree",
240 "physical_filter": "d-r",
241 "datetime_begin": visit_start,
242 "datetime_end": visit_end,
243 },
244 )
246 # Add more visits for some later tests
247 for visit_id in (424, 425):
248 butler.registry.insertDimensionData(
249 "visit",
250 {
251 "instrument": "DummyCamComp",
252 "id": visit_id,
253 "name": f"fourtwentyfour_{visit_id}",
254 "physical_filter": "d-r",
255 },
256 )
257 return butler, datasetType
259 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler:
260 # New datasets will be added to run and tag, but we will only look in
261 # tag when looking up datasets.
262 run = self.default_run
263 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
264 assert butler.run is not None
266 # Create and store a dataset
267 metric = makeExampleMetrics()
268 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
270 # Put and remove the dataset once as a DatasetRef, once as a dataId,
271 # and once with a DatasetType
273 # Keep track of any collections we add and do not clean up
274 expected_collections = {run}
276 counter = 0
277 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
278 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
279 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
280 # Since we are using subTest we can get cascading failures
281 # here with the first attempt failing and the others failing
282 # immediately because the dataset already exists. Work around
283 # this by using a distinct run collection each time
284 counter += 1
285 this_run = f"put_run_{counter}"
286 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
287 expected_collections.update({this_run})
289 with self.subTest(args=args):
290 kwargs: dict[str, Any] = {}
291 if not isinstance(args[0], DatasetRef): # type: ignore
292 kwargs["run"] = this_run
293 ref = butler.put(metric, *args, **kwargs)
294 self.assertIsInstance(ref, DatasetRef)
296 # Test get of a ref.
297 metricOut = butler.get(ref)
298 self.assertEqual(metric, metricOut)
299 # Test get
300 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
301 self.assertEqual(metric, metricOut)
302 # Test get with a datasetRef
303 metricOut = butler.get(ref)
304 self.assertEqual(metric, metricOut)
305 # Test getDeferred with dataId
306 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
307 self.assertEqual(metric, metricOut)
308 # Test getDeferred with a ref
309 metricOut = butler.getDeferred(ref).get()
310 self.assertEqual(metric, metricOut)
312 # Check we can get components
313 if storageClass.isComposite():
314 self.assertGetComponents(
315 butler, ref, ("summary", "data", "output"), metric, collections=this_run
316 )
318 primary_uri, secondary_uris = butler.getURIs(ref)
319 n_uris = len(secondary_uris)
320 if primary_uri:
321 n_uris += 1
323 # Can the artifacts themselves be retrieved?
324 if not butler._datastore.isEphemeral:
325 # Create a temporary directory to hold the retrieved
326 # artifacts.
327 with tempfile.TemporaryDirectory(
328 prefix="butler-artifacts-", ignore_cleanup_errors=True
329 ) as artifact_root:
330 root_uri = ResourcePath(artifact_root, forceDirectory=True)
332 for preserve_path in (True, False):
333 destination = root_uri.join(f"{preserve_path}_{counter}/")
334 log = logging.getLogger("lsst.x")
335 log.warning("Using destination %s for args %s", destination, args)
336 # Use copy so that we can test that overwrite
337 # protection works (using "auto" for File URIs
338 # would use hard links and subsequent transfer
339 # would work because it knows they are the same
340 # file).
341 transferred = butler.retrieveArtifacts(
342 [ref], destination, preserve_path=preserve_path, transfer="copy"
343 )
344 self.assertGreater(len(transferred), 0)
345 artifacts = list(ResourcePath.findFileResources([destination]))
346 self.assertEqual(set(transferred), set(artifacts))
348 for artifact in transferred:
349 path_in_destination = artifact.relative_to(destination)
350 self.assertIsNotNone(path_in_destination)
351 assert path_in_destination is not None
353 # When path is not preserved there should not
354 # be any path separators.
355 num_seps = path_in_destination.count("/")
356 if preserve_path:
357 self.assertGreater(num_seps, 0)
358 else:
359 self.assertEqual(num_seps, 0)
361 self.assertEqual(
362 len(artifacts),
363 n_uris,
364 "Comparing expected artifacts vs actual:"
365 f" {artifacts} vs {primary_uri} and {secondary_uris}",
366 )
368 if preserve_path:
369 # No need to run these twice
370 with self.assertRaises(ValueError):
371 butler.retrieveArtifacts([ref], destination, transfer="move")
373 with self.assertRaises(FileExistsError):
374 butler.retrieveArtifacts([ref], destination)
376 transferred_again = butler.retrieveArtifacts(
377 [ref], destination, preserve_path=preserve_path, overwrite=True
378 )
379 self.assertEqual(set(transferred_again), set(transferred))
381 # Now remove the dataset completely.
382 butler.pruneDatasets([ref], purge=True, unstore=True)
383 # Lookup with original args should still fail.
384 kwargs = {"collections": this_run}
385 if isinstance(args[0], DatasetRef):
386 kwargs = {} # Prevent warning from being issued.
387 self.assertFalse(butler.exists(*args, **kwargs))
388 # get() should still fail.
389 with self.assertRaises(FileNotFoundError):
390 butler.get(ref)
391 # Registry shouldn't be able to find it by dataset_id anymore.
392 self.assertIsNone(butler.get_dataset(ref.id))
394 # Do explicit registry removal since we know they are
395 # empty
396 butler.registry.removeCollection(this_run)
397 expected_collections.remove(this_run)
399 # Create DatasetRef for put using default run.
400 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
402 # Check that getDeferred fails with standalone ref.
403 with self.assertRaises(LookupError):
404 butler.getDeferred(refIn)
406 # Put the dataset again, since the last thing we did was remove it
407 # and we want to use the default collection.
408 ref = butler.put(metric, refIn)
410 # Get with parameters
411 stop = 4
412 sliced = butler.get(ref, parameters={"slice": slice(stop)})
413 self.assertNotEqual(metric, sliced)
414 self.assertEqual(metric.summary, sliced.summary)
415 self.assertEqual(metric.output, sliced.output)
416 assert metric.data is not None # for mypy
417 self.assertEqual(metric.data[:stop], sliced.data)
418 # getDeferred with parameters
419 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
420 self.assertNotEqual(metric, sliced)
421 self.assertEqual(metric.summary, sliced.summary)
422 self.assertEqual(metric.output, sliced.output)
423 self.assertEqual(metric.data[:stop], sliced.data)
424 # getDeferred with deferred parameters
425 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
426 self.assertNotEqual(metric, sliced)
427 self.assertEqual(metric.summary, sliced.summary)
428 self.assertEqual(metric.output, sliced.output)
429 self.assertEqual(metric.data[:stop], sliced.data)
431 if storageClass.isComposite():
432 # Check that components can be retrieved
433 metricOut = butler.get(ref.datasetType.name, dataId)
434 compNameS = ref.datasetType.componentTypeName("summary")
435 compNameD = ref.datasetType.componentTypeName("data")
436 summary = butler.get(compNameS, dataId)
437 self.assertEqual(summary, metric.summary)
438 data = butler.get(compNameD, dataId)
439 self.assertEqual(data, metric.data)
441 if "counter" in storageClass.derivedComponents:
442 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
443 self.assertEqual(count, len(data))
445 count = butler.get(
446 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
447 )
448 self.assertEqual(count, stop)
450 compRef = butler.find_dataset(compNameS, dataId, collections=butler.collections)
451 assert compRef is not None
452 summary = butler.get(compRef)
453 self.assertEqual(summary, metric.summary)
455 # Create a Dataset type that has the same name but is inconsistent.
456 inconsistentDatasetType = DatasetType(
457 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
458 )
460 # Getting with a dataset type that does not match registry fails
461 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
462 butler.get(inconsistentDatasetType, dataId)
464 # Combining a DatasetRef with a dataId should fail
465 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
466 butler.get(ref, dataId)
467 # Getting with an explicit ref should fail if the id doesn't match.
468 with self.assertRaises(FileNotFoundError):
469 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
471 # Getting a dataset with unknown parameters should fail
472 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
473 butler.get(ref, parameters={"unsupported": True})
475 # Check we have a collection
476 collections = set(butler.registry.queryCollections())
477 self.assertEqual(collections, expected_collections)
479 # Clean up to check that we can remove something that may have
480 # already had a component removed
481 butler.pruneDatasets([ref], unstore=True, purge=True)
483 # Add the same ref again, so we can check that duplicate put fails.
484 ref = butler.put(metric, datasetType, dataId)
486 # Repeat put will fail.
487 with self.assertRaisesRegex(
488 ConflictingDefinitionError, "A database constraint failure was triggered"
489 ):
490 butler.put(metric, datasetType, dataId)
492 # Remove the datastore entry.
493 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
495 # Put will still fail
496 with self.assertRaisesRegex(
497 ConflictingDefinitionError, "A database constraint failure was triggered"
498 ):
499 butler.put(metric, datasetType, dataId)
501 # Repeat the same sequence with resolved ref.
502 butler.pruneDatasets([ref], unstore=True, purge=True)
503 ref = butler.put(metric, refIn)
505 # Repeat put will fail.
506 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
507 butler.put(metric, refIn)
509 # Remove the datastore entry.
510 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
512 # In case of resolved ref this write will succeed.
513 ref = butler.put(metric, refIn)
515 # Leave the dataset in place since some downstream tests require
516 # something to be present
518 return butler
520 def testDeferredCollectionPassing(self) -> None:
521 # Construct a butler with no run or collection, but make it writeable.
522 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
523 # Create and register a DatasetType
524 dimensions = butler.dimensions.conform(["instrument", "visit"])
525 datasetType = self.addDatasetType(
526 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
527 )
528 # Add needed Dimensions
529 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
530 butler.registry.insertDimensionData(
531 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
532 )
533 butler.registry.insertDimensionData(
534 "visit",
535 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
536 )
537 dataId = {"instrument": "DummyCamComp", "visit": 423}
538 # Create dataset.
539 metric = makeExampleMetrics()
540 # Register a new run and put dataset.
541 run = "deferred"
542 self.assertTrue(butler.registry.registerRun(run))
543 # Second time it will be allowed but indicate no-op
544 self.assertFalse(butler.registry.registerRun(run))
545 ref = butler.put(metric, datasetType, dataId, run=run)
546 # Putting with no run should fail with TypeError.
547 with self.assertRaises(CollectionError):
548 butler.put(metric, datasetType, dataId)
549 # Dataset should exist.
550 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
551 # We should be able to get the dataset back, but with and without
552 # a deferred dataset handle.
553 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
554 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
555 # Trying to find the dataset without any collection is a TypeError.
556 self.assertFalse(butler.exists(datasetType, dataId))
557 with self.assertRaises(CollectionError):
558 butler.get(datasetType, dataId)
559 # Associate the dataset with a different collection.
560 butler.registry.registerCollection("tagged")
561 butler.registry.associate("tagged", [ref])
562 # Deleting the dataset from the new collection should make it findable
563 # in the original collection.
564 butler.pruneDatasets([ref], tags=["tagged"])
565 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
568class ButlerTests(ButlerPutGetTests):
569 """Tests for Butler."""
571 useTempRoot = True
572 validationCanFail: bool
573 fullConfigKey: str | None
574 registryStr: str | None
575 datastoreName: list[str] | None
576 datastoreStr: list[str]
578 def setUp(self) -> None:
579 """Create a new butler root for each test."""
580 self.root = makeTestTempDir(TESTDIR)
581 Butler.makeRepo(self.root, config=Config(self.configFile))
582 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
584 def testConstructor(self) -> None:
585 """Independent test of constructor."""
586 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
587 self.assertIsInstance(butler, Butler)
589 # Check that butler.yaml is added automatically.
590 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
591 config_dir = self.tmpConfigFile[: -len(end)]
592 butler = Butler.from_config(config_dir, run=self.default_run)
593 self.assertIsInstance(butler, Butler)
595 # Even with a ResourcePath.
596 butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
597 self.assertIsInstance(butler, Butler)
599 collections = set(butler.registry.queryCollections())
600 self.assertEqual(collections, {self.default_run})
602 # Check that some special characters can be included in run name.
603 special_run = "u@b.c-A"
604 butler_special = Butler.from_config(butler=butler, run=special_run)
605 collections = set(butler_special.registry.queryCollections("*@*"))
606 self.assertEqual(collections, {special_run})
608 butler2 = Butler.from_config(butler=butler, collections=["other"])
609 self.assertEqual(butler2.collections, ("other",))
610 self.assertIsNone(butler2.run)
611 self.assertEqual(type(butler._datastore), type(butler2._datastore))
612 self.assertEqual(butler._datastore.config, butler2._datastore.config)
614 # Test that we can use an environment variable to find this
615 # repository.
616 butler_index = Config()
617 butler_index["label"] = self.tmpConfigFile
618 for suffix in (".yaml", ".json"):
619 # Ensure that the content differs so that we know that
620 # we aren't reusing the cache.
621 bad_label = f"file://bucket/not_real{suffix}"
622 butler_index["bad_label"] = bad_label
623 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
624 butler_index.dumpToUri(temp_file)
625 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
626 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
627 uri = Butler.get_repo_uri("bad_label")
628 self.assertEqual(uri, ResourcePath(bad_label))
629 uri = Butler.get_repo_uri("label")
630 butler = Butler.from_config(uri, writeable=False)
631 self.assertIsInstance(butler, Butler)
632 butler = Butler.from_config("label", writeable=False)
633 self.assertIsInstance(butler, Butler)
634 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
635 Butler.from_config("not_there", writeable=False)
636 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
637 Butler.from_config("bad_label")
638 with self.assertRaises(FileNotFoundError):
639 # Should ignore aliases.
640 Butler.from_config(ResourcePath("label", forceAbsolute=False))
641 with self.assertRaises(KeyError) as cm:
642 Butler.get_repo_uri("missing")
643 self.assertEqual(
644 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
645 )
646 self.assertIn("not known to", str(cm.exception))
647 # Should report no failure.
648 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
649 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
650 # Now with empty configuration.
651 butler_index = Config()
652 butler_index.dumpToUri(temp_file)
653 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
654 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
655 Butler.from_config("label")
656 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
657 # Now with bad contents.
658 with open(temp_file.ospath, "w") as fh:
659 print("'", file=fh)
660 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
661 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
662 Butler.from_config("label")
663 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
664 with self.assertRaises(FileNotFoundError):
665 Butler.get_repo_uri("label")
666 self.assertEqual(Butler.get_known_repos(), set())
668 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
669 Butler.from_config("label")
671 # Check that we can create Butler when the alias file is not found.
672 butler = Butler.from_config(self.tmpConfigFile, writeable=False)
673 self.assertIsInstance(butler, Butler)
674 with self.assertRaises(RuntimeError) as cm:
675 # No environment variable set.
676 Butler.get_repo_uri("label")
677 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
678 self.assertIn("No repository index defined", str(cm.exception))
679 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
680 # No aliases registered.
681 Butler.from_config("not_there")
682 self.assertEqual(Butler.get_known_repos(), set())
684 def testDafButlerRepositories(self):
685 with unittest.mock.patch.dict(
686 os.environ,
687 {"DAF_BUTLER_REPOSITORIES": "label: 'https://someuri.com'\notherLabel: 'https://otheruri.com'\n"},
688 ):
689 self.assertEqual(str(Butler.get_repo_uri("label")), "https://someuri.com")
691 with unittest.mock.patch.dict(
692 os.environ,
693 {
694 "DAF_BUTLER_REPOSITORIES": "label: https://someuri.com",
695 "DAF_BUTLER_REPOSITORY_INDEX": "https://someuri.com",
696 },
697 ):
698 with self.assertRaisesRegex(RuntimeError, "Only one of the environment variables"):
699 Butler.get_repo_uri("label")
701 with unittest.mock.patch.dict(
702 os.environ,
703 {"DAF_BUTLER_REPOSITORIES": "invalid"},
704 ):
705 with self.assertRaisesRegex(ValueError, "Repository index not in expected format"):
706 Butler.get_repo_uri("label")
708 def testBasicPutGet(self) -> None:
709 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
710 self.runPutGetTest(storageClass, "test_metric")
712 def testCompositePutGetConcrete(self) -> None:
713 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
714 butler = self.runPutGetTest(storageClass, "test_metric")
716 # Should *not* be disassembled
717 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
718 self.assertEqual(len(datasets), 1)
719 uri, components = butler.getURIs(datasets[0])
720 self.assertIsInstance(uri, ResourcePath)
721 self.assertFalse(components)
722 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
723 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
725 # Predicted dataset
726 dataId = {"instrument": "DummyCamComp", "visit": 424}
727 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
728 self.assertFalse(components)
729 self.assertIsInstance(uri, ResourcePath)
730 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
731 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
733 def testCompositePutGetVirtual(self) -> None:
734 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
735 butler = self.runPutGetTest(storageClass, "test_metric_comp")
737 # Should be disassembled
738 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
739 self.assertEqual(len(datasets), 1)
740 uri, components = butler.getURIs(datasets[0])
742 if butler._datastore.isEphemeral:
743 # Never disassemble in-memory datastore
744 self.assertIsInstance(uri, ResourcePath)
745 self.assertFalse(components)
746 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
747 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
748 else:
749 self.assertIsNone(uri)
750 self.assertEqual(set(components), set(storageClass.components))
751 for compuri in components.values():
752 self.assertIsInstance(compuri, ResourcePath)
753 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
754 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
756 # Predicted dataset
757 dataId = {"instrument": "DummyCamComp", "visit": 424}
758 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
760 if butler._datastore.isEphemeral:
761 # Never disassembled
762 self.assertIsInstance(uri, ResourcePath)
763 self.assertFalse(components)
764 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
765 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
766 else:
767 self.assertIsNone(uri)
768 self.assertEqual(set(components), set(storageClass.components))
769 for compuri in components.values():
770 self.assertIsInstance(compuri, ResourcePath)
771 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
772 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
774 def testStorageClassOverrideGet(self) -> None:
775 """Test storage class conversion on get with override."""
776 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
777 datasetTypeName = "anything"
778 run = self.default_run
780 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
782 # Create and store a dataset.
783 metric = makeExampleMetrics()
784 dataId = {"instrument": "DummyCamComp", "visit": 423}
786 ref = butler.put(metric, datasetType, dataId)
788 # Return native type.
789 retrieved = butler.get(ref)
790 self.assertEqual(retrieved, metric)
792 # Specify an override.
793 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
794 model = butler.get(ref, storageClass=new_sc)
795 self.assertNotEqual(type(model), type(retrieved))
796 self.assertIs(type(model), new_sc.pytype)
797 self.assertEqual(retrieved, model)
799 # Defer but override later.
800 deferred = butler.getDeferred(ref)
801 model = deferred.get(storageClass=new_sc)
802 self.assertIs(type(model), new_sc.pytype)
803 self.assertEqual(retrieved, model)
805 # Defer but override up front.
806 deferred = butler.getDeferred(ref, storageClass=new_sc)
807 model = deferred.get()
808 self.assertIs(type(model), new_sc.pytype)
809 self.assertEqual(retrieved, model)
811 # Retrieve a component. Should be a tuple.
812 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
813 self.assertIs(type(data), tuple)
814 self.assertEqual(data, tuple(retrieved.data))
816 # Parameter on the write storage class should work regardless
817 # of read storage class.
818 data = butler.get(
819 "anything.data",
820 dataId,
821 storageClass="StructuredDataDataTestTuple",
822 parameters={"slice": slice(2, 4)},
823 )
824 self.assertEqual(len(data), 2)
826 # Try a parameter that is known to the read storage class but not
827 # the write storage class.
828 with self.assertRaises(KeyError):
829 butler.get(
830 "anything.data",
831 dataId,
832 storageClass="StructuredDataDataTestTuple",
833 parameters={"xslice": slice(2, 4)},
834 )
836 def testPytypePutCoercion(self) -> None:
837 """Test python type coercion on Butler.get and put."""
838 # Store some data with the normal example storage class.
839 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
840 datasetTypeName = "test_metric"
841 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
843 dataId = {"instrument": "DummyCamComp", "visit": 423}
845 # Put a dict and this should coerce to a MetricsExample
846 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
847 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
848 test_metric = butler.get(metric_ref)
849 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
850 self.assertEqual(test_metric.summary, test_dict["summary"])
851 self.assertEqual(test_metric.output, test_dict["output"])
853 # Check that the put still works if a DatasetType is given with
854 # a definition matching this python type.
855 registry_type = butler.get_dataset_type(datasetTypeName)
856 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
857 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
858 self.assertEqual(metric2_ref.datasetType, registry_type)
860 # The get will return the type expected by registry.
861 test_metric2 = butler.get(metric2_ref)
862 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
864 # Make a new DatasetRef with the compatible but different DatasetType.
865 # This should now return a dict.
866 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
867 test_dict2 = butler.get(new_ref)
868 self.assertEqual(get_full_type_name(test_dict2), "dict")
870 # Get it again with the wrong dataset type definition using get()
871 # rather than get(). This should be consistent with get()
872 # behavior and return the type of the DatasetType.
873 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
874 self.assertEqual(get_full_type_name(test_dict3), "dict")
876 def testIngest(self) -> None:
877 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
879 # Create and register a DatasetType
880 dimensions = butler.dimensions.conform(["instrument", "visit", "detector"])
882 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
883 datasetTypeName = "metric"
885 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
887 # Add needed Dimensions
888 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
889 butler.registry.insertDimensionData(
890 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
891 )
892 for detector in (1, 2):
893 butler.registry.insertDimensionData(
894 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
895 )
897 butler.registry.insertDimensionData(
898 "visit",
899 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
900 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
901 )
903 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
904 dataRoot = os.path.join(TESTDIR, "data", "basic")
905 datasets = []
906 for detector in (1, 2):
907 detector_name = f"detector_{detector}"
908 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
909 dataId = butler.registry.expandDataId(
910 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
911 )
912 # Create a DatasetRef for ingest
913 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
915 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
917 butler.ingest(*datasets, transfer="copy")
919 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
920 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
922 metrics1 = butler.get(datasetTypeName, dataId1)
923 metrics2 = butler.get(datasetTypeName, dataId2)
924 self.assertNotEqual(metrics1, metrics2)
926 # Compare URIs
927 uri1 = butler.getURI(datasetTypeName, dataId1)
928 uri2 = butler.getURI(datasetTypeName, dataId2)
929 self.assertNotEqual(uri1, uri2)
931 # Now do a multi-dataset but single file ingest
932 metricFile = os.path.join(dataRoot, "detectors.yaml")
933 refs = []
934 for detector in (1, 2):
935 detector_name = f"detector_{detector}"
936 dataId = butler.registry.expandDataId(
937 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
938 )
939 # Create a DatasetRef for ingest
940 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
942 # Test "move" transfer to ensure that the files themselves
943 # have disappeared following ingest.
944 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
945 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
947 datasets = []
948 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
950 # For first ingest use copy.
951 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
953 # Now try to ingest again in "execution butler" mode where
954 # the registry entries exist but the datastore does not have
955 # the files. We also need to strip the dimension records to ensure
956 # that they will be re-added by the ingest.
957 ref = datasets[0].refs[0]
958 datasets[0].refs = [
959 cast(
960 DatasetRef,
961 butler.find_dataset(ref.datasetType, data_id=ref.dataId, collections=ref.run),
962 )
963 for ref in datasets[0].refs
964 ]
965 all_refs = []
966 for dataset in datasets:
967 refs = []
968 for ref in dataset.refs:
969 # Create a dict from the dataId to drop the records.
970 new_data_id = dict(ref.dataId.required)
971 new_ref = butler.find_dataset(ref.datasetType, new_data_id, collections=ref.run)
972 assert new_ref is not None
973 self.assertFalse(new_ref.dataId.hasRecords())
974 refs.append(new_ref)
975 dataset.refs = refs
976 all_refs.extend(dataset.refs)
977 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
979 # Use move mode to test that the file is deleted. Also
980 # disable recording of file size.
981 butler.ingest(*datasets, transfer="move", record_validation_info=False)
983 # Check that every ref now has records.
984 for dataset in datasets:
985 for ref in dataset.refs:
986 self.assertTrue(ref.dataId.hasRecords())
988 # Ensure that the file has disappeared.
989 self.assertFalse(tempFile.exists())
991 # Check that the datastore recorded no file size.
992 # Not all datastores can support this.
993 try:
994 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
995 self.assertEqual(infos[0].file_size, -1)
996 except AttributeError:
997 pass
999 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
1000 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
1002 multi1 = butler.get(datasetTypeName, dataId1)
1003 multi2 = butler.get(datasetTypeName, dataId2)
1005 self.assertEqual(multi1, metrics1)
1006 self.assertEqual(multi2, metrics2)
1008 # Compare URIs
1009 uri1 = butler.getURI(datasetTypeName, dataId1)
1010 uri2 = butler.getURI(datasetTypeName, dataId2)
1011 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
1013 # Test that removing one does not break the second
1014 # This line will issue a warning log message for a ChainedDatastore
1015 # that uses an InMemoryDatastore since in-memory can not ingest
1016 # files.
1017 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
1018 self.assertFalse(butler.exists(datasetTypeName, dataId1))
1019 self.assertTrue(butler.exists(datasetTypeName, dataId2))
1020 multi2b = butler.get(datasetTypeName, dataId2)
1021 self.assertEqual(multi2, multi2b)
1023 # Ensure we can ingest 0 datasets
1024 datasets = []
1025 butler.ingest(*datasets)
1027 def testPickle(self) -> None:
1028 """Test pickle support."""
1029 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1030 assert isinstance(butler, DirectButler), "Expect DirectButler in configuration"
1031 butlerOut = pickle.loads(pickle.dumps(butler))
1032 self.assertIsInstance(butlerOut, Butler)
1033 self.assertEqual(butlerOut._config, butler._config)
1034 self.assertEqual(butlerOut.collections, butler.collections)
1035 self.assertEqual(butlerOut.run, butler.run)
1037 def testGetDatasetTypes(self) -> None:
1038 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1039 dimensions = butler.dimensions.conform(["instrument", "visit", "physical_filter"])
1040 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
1041 (
1042 "instrument",
1043 [
1044 {"instrument": "DummyCam"},
1045 {"instrument": "DummyHSC"},
1046 {"instrument": "DummyCamComp"},
1047 ],
1048 ),
1049 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1050 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1051 ]
1052 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1053 # Add needed Dimensions
1054 for element, data in dimensionEntries:
1055 butler.registry.insertDimensionData(element, *data)
1057 # When a DatasetType is added to the registry entries are not created
1058 # for components but querying them can return the components.
1059 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1060 components = set()
1061 for datasetTypeName in datasetTypeNames:
1062 # Create and register a DatasetType
1063 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1065 for componentName in storageClass.components:
1066 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1068 fromRegistry: set[DatasetType] = set()
1069 for parent_dataset_type in butler.registry.queryDatasetTypes():
1070 fromRegistry.add(parent_dataset_type)
1071 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1072 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1074 # Now that we have some dataset types registered, validate them
1075 butler.validateConfiguration(
1076 ignore=[
1077 "test_metric_comp",
1078 "metric3",
1079 "metric5",
1080 "calexp",
1081 "DummySC",
1082 "datasetType.component",
1083 "random_data",
1084 "random_data_2",
1085 ]
1086 )
1088 # Add a new datasetType that will fail template validation
1089 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1090 if self.validationCanFail:
1091 with self.assertRaises(ValidationError):
1092 butler.validateConfiguration()
1094 # Rerun validation but with a subset of dataset type names
1095 butler.validateConfiguration(datasetTypeNames=["metric4"])
1097 # Rerun validation but ignore the bad datasetType
1098 butler.validateConfiguration(
1099 ignore=[
1100 "test_metric_comp",
1101 "metric3",
1102 "metric5",
1103 "calexp",
1104 "DummySC",
1105 "datasetType.component",
1106 "random_data",
1107 "random_data_2",
1108 ]
1109 )
1111 def testTransaction(self) -> None:
1112 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1113 datasetTypeName = "test_metric"
1114 dimensions = butler.dimensions.conform(["instrument", "visit"])
1115 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1116 ("instrument", {"instrument": "DummyCam"}),
1117 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1118 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1119 )
1120 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1121 metric = makeExampleMetrics()
1122 dataId = {"instrument": "DummyCam", "visit": 42}
1123 # Create and register a DatasetType
1124 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1125 with self.assertRaises(TransactionTestError):
1126 with butler.transaction():
1127 # Add needed Dimensions
1128 for args in dimensionEntries:
1129 butler.registry.insertDimensionData(*args)
1130 # Store a dataset
1131 ref = butler.put(metric, datasetTypeName, dataId)
1132 self.assertIsInstance(ref, DatasetRef)
1133 # Test get of a ref.
1134 metricOut = butler.get(ref)
1135 self.assertEqual(metric, metricOut)
1136 # Test get
1137 metricOut = butler.get(datasetTypeName, dataId)
1138 self.assertEqual(metric, metricOut)
1139 # Check we can get components
1140 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1141 raise TransactionTestError("This should roll back the entire transaction")
1142 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1143 butler.registry.expandDataId(dataId)
1144 # Should raise LookupError for missing data ID value
1145 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1146 butler.get(datasetTypeName, dataId)
1147 # Also check explicitly if Dataset entry is missing
1148 self.assertIsNone(butler.find_dataset(datasetType, dataId, collections=butler.collections))
1149 # Direct retrieval should not find the file in the Datastore
1150 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1151 butler.get(ref)
1153 def testMakeRepo(self) -> None:
1154 """Test that we can write butler configuration to a new repository via
1155 the Butler.makeRepo interface and then instantiate a butler from the
1156 repo root.
1157 """
1158 # Do not run the test if we know this datastore configuration does
1159 # not support a file system root
1160 if self.fullConfigKey is None:
1161 return
1163 # create two separate directories
1164 root1 = tempfile.mkdtemp(dir=self.root)
1165 root2 = tempfile.mkdtemp(dir=self.root)
1167 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1168 limited = Config(self.configFile)
1169 butler1 = Butler.from_config(butlerConfig)
1170 assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration"
1171 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1172 full = Config(self.tmpConfigFile)
1173 butler2 = Butler.from_config(butlerConfig)
1174 assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration"
1175 # Butlers should have the same configuration regardless of whether
1176 # defaults were expanded.
1177 self.assertEqual(butler1._config, butler2._config)
1178 # Config files loaded directly should not be the same.
1179 self.assertNotEqual(limited, full)
1180 # Make sure "limited" doesn't have a few keys we know it should be
1181 # inheriting from defaults.
1182 self.assertIn(self.fullConfigKey, full)
1183 self.assertNotIn(self.fullConfigKey, limited)
1185 # Collections don't appear until something is put in them
1186 collections1 = set(butler1.registry.queryCollections())
1187 self.assertEqual(collections1, set())
1188 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1190 # Check that a config with no associated file name will not
1191 # work properly with relocatable Butler repo
1192 butlerConfig.configFile = None
1193 with self.assertRaises(ValueError):
1194 Butler.from_config(butlerConfig)
1196 with self.assertRaises(FileExistsError):
1197 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1199 def testStringification(self) -> None:
1200 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1201 butlerStr = str(butler)
1203 if self.datastoreStr is not None:
1204 for testStr in self.datastoreStr:
1205 self.assertIn(testStr, butlerStr)
1206 if self.registryStr is not None:
1207 self.assertIn(self.registryStr, butlerStr)
1209 datastoreName = butler._datastore.name
1210 if self.datastoreName is not None:
1211 for testStr in self.datastoreName:
1212 self.assertIn(testStr, datastoreName)
1214 def testButlerRewriteDataId(self) -> None:
1215 """Test that dataIds can be rewritten based on dimension records."""
1216 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1218 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1219 datasetTypeName = "random_data"
1221 # Create dimension records.
1222 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1223 butler.registry.insertDimensionData(
1224 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1225 )
1226 butler.registry.insertDimensionData(
1227 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1228 )
1230 dimensions = butler.dimensions.conform(["instrument", "exposure"])
1231 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1232 butler.registry.registerDatasetType(datasetType)
1234 n_exposures = 5
1235 dayobs = 20210530
1237 for i in range(n_exposures):
1238 butler.registry.insertDimensionData(
1239 "exposure",
1240 {
1241 "instrument": "DummyCamComp",
1242 "id": i,
1243 "obs_id": f"exp{i}",
1244 "seq_num": i,
1245 "day_obs": dayobs,
1246 "physical_filter": "d-r",
1247 },
1248 )
1250 # Write some data.
1251 for i in range(n_exposures):
1252 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1254 # Use the seq_num for the put to test rewriting.
1255 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1256 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1258 # Check that the exposure is correct in the dataId
1259 self.assertEqual(ref.dataId["exposure"], i)
1261 # and check that we can get the dataset back with the same dataId
1262 new_metric = butler.get(datasetTypeName, dataId=dataId)
1263 self.assertEqual(new_metric, metric)
1265 def testGetDatasetCollectionCaching(self):
1266 # Prior to DM-41117, there was a bug where get_dataset would throw
1267 # MissingCollectionError if you tried to fetch a dataset that was added
1268 # after the collection cache was last updated.
1269 reader_butler, datasetType = self.create_butler(self.default_run, "int", "datasettypename")
1270 writer_butler = Butler.from_config(self.tmpConfigFile, writeable=True, run="new_run")
1271 dataId = {"instrument": "DummyCamComp", "visit": 423}
1272 put_ref = writer_butler.put(123, datasetType, dataId)
1273 get_ref = reader_butler.get_dataset(put_ref.id)
1274 self.assertEqual(get_ref.id, put_ref.id)
1277class FileDatastoreButlerTests(ButlerTests):
1278 """Common tests and specialization of ButlerTests for butlers backed
1279 by datastores that inherit from FileDatastore.
1280 """
1282 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1283 """Check if file exists at a given path (relative to root).
1285 Test testPutTemplates verifies actual physical existance of the files
1286 in the requested location.
1287 """
1288 uri = ResourcePath(root, forceDirectory=True)
1289 return uri.join(relpath).exists()
1291 def testPutTemplates(self) -> None:
1292 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1293 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1295 # Add needed Dimensions
1296 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1297 butler.registry.insertDimensionData(
1298 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1299 )
1300 butler.registry.insertDimensionData(
1301 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1302 )
1303 butler.registry.insertDimensionData(
1304 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1305 )
1307 # Create and store a dataset
1308 metric = makeExampleMetrics()
1310 # Create two almost-identical DatasetTypes (both will use default
1311 # template)
1312 dimensions = butler.dimensions.conform(["instrument", "visit"])
1313 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1314 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1315 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1317 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1318 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1320 # Put with exactly the data ID keys needed
1321 ref = butler.put(metric, "metric1", dataId1)
1322 uri = butler.getURI(ref)
1323 self.assertTrue(uri.exists())
1324 self.assertTrue(
1325 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1326 )
1328 # Check the template based on dimensions
1329 if hasattr(butler._datastore, "templates"):
1330 butler._datastore.templates.validateTemplates([ref])
1332 # Put with extra data ID keys (physical_filter is an optional
1333 # dependency); should not change template (at least the way we're
1334 # defining them to behave now; the important thing is that they
1335 # must be consistent).
1336 ref = butler.put(metric, "metric2", dataId2)
1337 uri = butler.getURI(ref)
1338 self.assertTrue(uri.exists())
1339 self.assertTrue(
1340 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1341 )
1343 # Check the template based on dimensions
1344 if hasattr(butler._datastore, "templates"):
1345 butler._datastore.templates.validateTemplates([ref])
1347 # Use a template that has a typo in dimension record metadata.
1348 # Easier to test with a butler that has a ref with records attached.
1349 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1350 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1351 path = template.format(ref)
1352 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1354 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1355 with self.assertRaises(KeyError):
1356 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"):
1357 template.format(ref)
1359 # Now use a file template that will not result in unique filenames
1360 with self.assertRaises(FileTemplateValidationError):
1361 butler.put(metric, "metric3", dataId1)
1363 def testImportExport(self) -> None:
1364 # Run put/get tests just to create and populate a repo.
1365 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1366 self.runImportExportTest(storageClass)
1368 @unittest.expectedFailure
1369 def testImportExportVirtualComposite(self) -> None:
1370 # Run put/get tests just to create and populate a repo.
1371 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1372 self.runImportExportTest(storageClass)
1374 def runImportExportTest(self, storageClass: StorageClass) -> None:
1375 """Test exporting and importing.
1377 This test does an export to a temp directory and an import back
1378 into a new temp directory repo. It does not assume a posix datastore.
1379 """
1380 exportButler = self.runPutGetTest(storageClass, "test_metric")
1382 # Test that we must have a file extension.
1383 with self.assertRaises(ValueError):
1384 with exportButler.export(filename="dump", directory=".") as export:
1385 pass
1387 # Test that unknown format is not allowed.
1388 with self.assertRaises(ValueError):
1389 with exportButler.export(filename="dump.fits", directory=".") as export:
1390 pass
1392 # Test that the repo actually has at least one dataset.
1393 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1394 self.assertGreater(len(datasets), 0)
1395 # Add a DimensionRecord that's unused by those datasets.
1396 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1397 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1398 # Export and then import datasets.
1399 with safeTestTempDir(TESTDIR) as exportDir:
1400 exportFile = os.path.join(exportDir, "exports.yaml")
1401 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1402 export.saveDatasets(datasets)
1403 # Export the same datasets again. This should quietly do
1404 # nothing because of internal deduplication, and it shouldn't
1405 # complain about being asked to export the "htm7" elements even
1406 # though there aren't any in these datasets or in the database.
1407 export.saveDatasets(datasets, elements=["htm7"])
1408 # Save one of the data IDs again; this should be harmless
1409 # because of internal deduplication.
1410 export.saveDataIds([datasets[0].dataId])
1411 # Save some dimension records directly.
1412 export.saveDimensionData("skymap", [skymapRecord])
1413 self.assertTrue(os.path.exists(exportFile))
1414 with safeTestTempDir(TESTDIR) as importDir:
1415 # We always want this to be a local posix butler
1416 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1417 # Calling script.butlerImport tests the implementation of the
1418 # butler command line interface "import" subcommand. Functions
1419 # in the script folder are generally considered protected and
1420 # should not be used as public api.
1421 with open(exportFile) as f:
1422 script.butlerImport(
1423 importDir,
1424 export_file=f,
1425 directory=exportDir,
1426 transfer="auto",
1427 skip_dimensions=None,
1428 )
1429 importButler = Butler.from_config(importDir, run=self.default_run)
1430 for ref in datasets:
1431 with self.subTest(ref=ref):
1432 # Test for existence by passing in the DatasetType and
1433 # data ID separately, to avoid lookup by dataset_id.
1434 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1435 self.assertEqual(
1436 list(importButler.registry.queryDimensionRecords("skymap")),
1437 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1438 )
1440 def testRemoveRuns(self) -> None:
1441 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1442 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
1443 # Load registry data with dimensions to hang datasets off of.
1444 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1445 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1446 # Add some RUN-type collection.
1447 run1 = "run1"
1448 butler.registry.registerRun(run1)
1449 run2 = "run2"
1450 butler.registry.registerRun(run2)
1451 # put a dataset in each
1452 metric = makeExampleMetrics()
1453 dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
1454 datasetType = self.addDatasetType(
1455 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1456 )
1457 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1458 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1459 uri1 = butler.getURI(ref1)
1460 uri2 = butler.getURI(ref2)
1462 with self.assertRaises(OrphanedRecordError):
1463 butler.registry.removeDatasetType(datasetType.name)
1465 # Remove from both runs with different values for unstore.
1466 butler.removeRuns([run1], unstore=True)
1467 butler.removeRuns([run2], unstore=False)
1468 # Should be nothing in registry for either one, and datastore should
1469 # not think either exists.
1470 with self.assertRaises(MissingCollectionError):
1471 butler.registry.getCollectionType(run1)
1472 with self.assertRaises(MissingCollectionError):
1473 butler.registry.getCollectionType(run2)
1474 self.assertFalse(butler.stored(ref1))
1475 self.assertFalse(butler.stored(ref2))
1476 # The ref we unstored should be gone according to the URI, but the
1477 # one we forgot should still be around.
1478 self.assertFalse(uri1.exists())
1479 self.assertTrue(uri2.exists())
1481 # Now that the collections have been pruned we can remove the
1482 # dataset type
1483 butler.registry.removeDatasetType(datasetType.name)
1485 with self.assertLogs("lsst.daf.butler.registry", "INFO") as cm:
1486 butler.registry.removeDatasetType(("test*", "test*"))
1487 self.assertIn("not defined", "\n".join(cm.output))
1490class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1491 """PosixDatastore specialization of a butler"""
1493 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1494 fullConfigKey: str | None = ".datastore.formatters"
1495 validationCanFail = True
1496 datastoreStr = ["/tmp"]
1497 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1498 registryStr = "/gen3.sqlite3"
1500 def testPathConstructor(self) -> None:
1501 """Independent test of constructor using PathLike."""
1502 butler = Butler.from_config(self.tmpConfigFile, run=self.default_run)
1503 self.assertIsInstance(butler, Butler)
1505 # And again with a Path object with the butler yaml
1506 path = pathlib.Path(self.tmpConfigFile)
1507 butler = Butler.from_config(path, writeable=False)
1508 self.assertIsInstance(butler, Butler)
1510 # And again with a Path object without the butler yaml
1511 # (making sure we skip it if the tmp config doesn't end
1512 # in butler.yaml -- which is the case for a subclass)
1513 if self.tmpConfigFile.endswith("butler.yaml"):
1514 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1515 butler = Butler.from_config(path, writeable=False)
1516 self.assertIsInstance(butler, Butler)
1518 def testExportTransferCopy(self) -> None:
1519 """Test local export using all transfer modes"""
1520 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1521 exportButler = self.runPutGetTest(storageClass, "test_metric")
1522 # Test that the repo actually has at least one dataset.
1523 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1524 self.assertGreater(len(datasets), 0)
1525 uris = [exportButler.getURI(d) for d in datasets]
1526 assert isinstance(exportButler._datastore, FileDatastore)
1527 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1529 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1531 for path in pathsInStore:
1532 # Assume local file system
1533 assert path is not None
1534 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1536 for transfer in ("copy", "link", "symlink", "relsymlink"):
1537 with safeTestTempDir(TESTDIR) as exportDir:
1538 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1539 export.saveDatasets(datasets)
1540 for path in pathsInStore:
1541 assert path is not None
1542 self.assertTrue(
1543 self.checkFileExists(exportDir, path),
1544 f"Check that mode {transfer} exported files",
1545 )
1547 def testPruneDatasets(self) -> None:
1548 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1549 butler = Butler.from_config(self.tmpConfigFile, writeable=True)
1550 assert isinstance(butler._datastore, FileDatastore)
1551 # Load registry data with dimensions to hang datasets off of.
1552 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1553 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1554 # Add some RUN-type collections.
1555 run1 = "run1"
1556 butler.registry.registerRun(run1)
1557 run2 = "run2"
1558 butler.registry.registerRun(run2)
1559 # put some datasets. ref1 and ref2 have the same data ID, and are in
1560 # different runs. ref3 has a different data ID.
1561 metric = makeExampleMetrics()
1562 dimensions = butler.dimensions.conform(["instrument", "physical_filter"])
1563 datasetType = self.addDatasetType(
1564 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1565 )
1566 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1567 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1568 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1570 many_stored = butler.stored_many([ref1, ref2, ref3])
1571 for ref, stored in many_stored.items():
1572 self.assertTrue(stored, f"Ref {ref} should be stored")
1574 many_exists = butler._exists_many([ref1, ref2, ref3])
1575 for ref, exists in many_exists.items():
1576 self.assertTrue(exists, f"Checking ref {ref} exists.")
1577 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1579 # Simple prune.
1580 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1581 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1583 many_stored = butler.stored_many([ref1, ref2, ref3])
1584 for ref, stored in many_stored.items():
1585 self.assertFalse(stored, f"Ref {ref} should not be stored")
1587 many_exists = butler._exists_many([ref1, ref2, ref3])
1588 for ref, exists in many_exists.items():
1589 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1591 # Put data back.
1592 ref1_new = butler.put(metric, ref1)
1593 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1594 ref2 = butler.put(metric, ref2)
1596 many_stored = butler.stored_many([ref1, ref2, ref3])
1597 self.assertTrue(many_stored[ref1])
1598 self.assertTrue(many_stored[ref2])
1599 self.assertFalse(many_stored[ref3])
1601 ref3 = butler.put(metric, ref3)
1603 many_exists = butler._exists_many([ref1, ref2, ref3])
1604 for ref, exists in many_exists.items():
1605 self.assertTrue(exists, f"Ref {ref} should not be stored")
1607 # Clear out the datasets from registry and start again.
1608 refs = [ref1, ref2, ref3]
1609 butler.pruneDatasets(refs, purge=True, unstore=True)
1610 for ref in refs:
1611 butler.put(metric, ref)
1613 # Confirm we can retrieve deferred.
1614 dref1 = butler.getDeferred(ref1) # known and exists
1615 metric1 = dref1.get()
1616 self.assertEqual(metric1, metric)
1618 # Test different forms of file availability.
1619 # Need to be in a state where:
1620 # - one ref just has registry record.
1621 # - one ref has a missing file but a datastore record.
1622 # - one ref has a missing datastore record but file is there.
1623 # - one ref does not exist anywhere.
1624 # Do not need to test a ref that has everything since that is tested
1625 # above.
1626 ref0 = DatasetRef(
1627 datasetType,
1628 DataCoordinate.standardize(
1629 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1630 ),
1631 run=run1,
1632 )
1634 # Delete from datastore and retain in Registry.
1635 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1637 # File has been removed.
1638 uri2 = butler.getURI(ref2)
1639 uri2.remove()
1641 # Datastore has lost track.
1642 butler._datastore.forget([ref3])
1644 # First test with a standard butler.
1645 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1646 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1647 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1648 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1649 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1651 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1652 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1653 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1654 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1655 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1656 self.assertTrue(exists_many[ref2])
1658 # Check that per-ref query gives the same answer as many query.
1659 for ref, exists in exists_many.items():
1660 self.assertEqual(butler.exists(ref, full_check=False), exists)
1662 # Get deferred checks for existence before it allows it to be
1663 # retrieved.
1664 with self.assertRaises(LookupError):
1665 butler.getDeferred(ref3) # not known, file exists
1666 dref2 = butler.getDeferred(ref2) # known but file missing
1667 with self.assertRaises(FileNotFoundError):
1668 dref2.get()
1670 # Test again with a trusting butler.
1671 butler._datastore.trustGetRequest = True
1672 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1673 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1674 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1675 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1676 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1678 # When trusting we can get a deferred dataset handle that is not
1679 # known but does exist.
1680 dref3 = butler.getDeferred(ref3)
1681 metric3 = dref3.get()
1682 self.assertEqual(metric3, metric)
1684 # Check that per-ref query gives the same answer as many query.
1685 for ref, exists in exists_many.items():
1686 self.assertEqual(butler.exists(ref, full_check=True), exists)
1688 # Create a ref that surprisingly has the UUID of an existing ref
1689 # but is not the same.
1690 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1691 with self.assertRaises(ValueError):
1692 butler.exists(ref_bad)
1694 # Create a ref that has a compatible storage class.
1695 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1696 exists = butler.exists(ref_compat)
1697 self.assertEqual(exists, exists_many[ref2])
1699 # Remove everything and start from scratch.
1700 butler._datastore.trustGetRequest = False
1701 butler.pruneDatasets(refs, purge=True, unstore=True)
1702 for ref in refs:
1703 butler.put(metric, ref)
1705 # These tests mess directly with the trash table and can leave the
1706 # datastore in an odd state. Do them at the end.
1707 # Check that in normal mode, deleting the record will lead to
1708 # trash not touching the file.
1709 uri1 = butler.getURI(ref1)
1710 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1711 butler._datastore.forget([ref1])
1712 butler._datastore.trash(ref1)
1713 butler._datastore.emptyTrash()
1714 self.assertTrue(uri1.exists())
1715 uri1.remove() # Clean it up.
1717 # Simulate execution butler setup by deleting the datastore
1718 # record but keeping the file around and trusting.
1719 butler._datastore.trustGetRequest = True
1720 uris = butler.get_many_uris([ref2, ref3])
1721 uri2 = uris[ref2].primaryURI
1722 uri3 = uris[ref3].primaryURI
1723 self.assertTrue(uri2.exists())
1724 self.assertTrue(uri3.exists())
1726 # Remove the datastore record.
1727 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1728 butler._datastore.forget([ref2])
1729 self.assertTrue(uri2.exists())
1730 butler._datastore.trash([ref2, ref3])
1731 # Immediate removal for ref2 file
1732 self.assertFalse(uri2.exists())
1733 # But ref3 has to wait for the empty.
1734 self.assertTrue(uri3.exists())
1735 butler._datastore.emptyTrash()
1736 self.assertFalse(uri3.exists())
1738 # Clear out the datasets from registry.
1739 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1741 def testPytypeCoercion(self) -> None:
1742 """Test python type coercion on Butler.get and put."""
1743 # Store some data with the normal example storage class.
1744 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1745 datasetTypeName = "test_metric"
1746 butler = self.runPutGetTest(storageClass, datasetTypeName)
1748 dataId = {"instrument": "DummyCamComp", "visit": 423}
1749 metric = butler.get(datasetTypeName, dataId=dataId)
1750 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1752 datasetType_ori = butler.get_dataset_type(datasetTypeName)
1753 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1755 # Now need to hack the registry dataset type definition.
1756 # There is no API for this.
1757 assert isinstance(butler._registry, SqlRegistry)
1758 manager = butler._registry._managers.datasets
1759 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1760 manager._db.update(
1761 manager._static.dataset_type,
1762 {"name": datasetTypeName},
1763 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1764 )
1766 # Force reset of dataset type cache
1767 butler.registry.refresh()
1769 datasetType_new = butler.get_dataset_type(datasetTypeName)
1770 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1771 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1773 metric_model = butler.get(datasetTypeName, dataId=dataId)
1774 self.assertNotEqual(type(metric_model), type(metric))
1775 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1777 # Put the model and read it back to show that everything now
1778 # works as normal.
1779 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1780 metric_model_new = butler.get(metric_ref)
1781 self.assertEqual(metric_model_new, metric_model)
1783 # Hack the storage class again to something that will fail on the
1784 # get with no conversion class.
1785 manager._db.update(
1786 manager._static.dataset_type,
1787 {"name": datasetTypeName},
1788 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1789 )
1790 butler.registry.refresh()
1792 with self.assertRaises(ValueError):
1793 butler.get(datasetTypeName, dataId=dataId)
1796@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1797class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1798 """PosixDatastore specialization of a butler using Postgres"""
1800 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1801 fullConfigKey = ".datastore.formatters"
1802 validationCanFail = True
1803 datastoreStr = ["/tmp"]
1804 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1805 registryStr = "PostgreSQL@test"
1806 postgresql: Any
1808 @staticmethod
1809 def _handler(postgresql: Any) -> None:
1810 engine = sqlalchemy.engine.create_engine(postgresql.url())
1811 with engine.begin() as connection:
1812 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1814 @classmethod
1815 def setUpClass(cls) -> None:
1816 # Create the postgres test server.
1817 cls.postgresql = testing.postgresql.PostgresqlFactory(
1818 cache_initialized_db=True, on_initialized=cls._handler
1819 )
1820 super().setUpClass()
1822 @classmethod
1823 def tearDownClass(cls) -> None:
1824 # Clean up any lingering SQLAlchemy engines/connections
1825 # so they're closed before we shut down the server.
1826 gc.collect()
1827 cls.postgresql.clear_cache()
1828 super().tearDownClass()
1830 def setUp(self) -> None:
1831 self.server = self.postgresql()
1833 # Need to add a registry section to the config.
1834 self._temp_config = False
1835 config = Config(self.configFile)
1836 config["registry", "db"] = self.server.url()
1837 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1838 config.dump(fh)
1839 self.configFile = fh.name
1840 self._temp_config = True
1841 super().setUp()
1843 def tearDown(self) -> None:
1844 self.server.stop()
1845 if self._temp_config and os.path.exists(self.configFile):
1846 os.remove(self.configFile)
1847 super().tearDown()
1849 def testMakeRepo(self) -> None:
1850 # The base class test assumes that it's using sqlite and assumes
1851 # the config file is acceptable to sqlite.
1852 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1855@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1856class ClonedPostgresPosixDatastoreButlerTestCase(PostgresPosixDatastoreButlerTestCase, unittest.TestCase):
1857 """Test that Butler with a Postgres registry still works after cloning."""
1859 def create_butler(
1860 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
1861 ) -> tuple[DirectButler, DatasetType]:
1862 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName)
1863 return butler._clone(run=run), datasetType
1866class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1867 """InMemoryDatastore specialization of a butler"""
1869 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1870 fullConfigKey = None
1871 useTempRoot = False
1872 validationCanFail = False
1873 datastoreStr = ["datastore='InMemory"]
1874 datastoreName = ["InMemoryDatastore@"]
1875 registryStr = "/gen3.sqlite3"
1877 def testIngest(self) -> None:
1878 pass
1881class ClonedSqliteButlerTestCase(InMemoryDatastoreButlerTestCase, unittest.TestCase):
1882 """Test that a Butler with a Sqlite registry still works after cloning."""
1884 def create_butler(
1885 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
1886 ) -> tuple[DirectButler, DatasetType]:
1887 butler, datasetType = super().create_butler(run, storageClass, datasetTypeName)
1888 return butler._clone(run=run), datasetType
1891class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1892 """PosixDatastore specialization"""
1894 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1895 fullConfigKey = ".datastore.datastores.1.formatters"
1896 validationCanFail = True
1897 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1898 datastoreName = [
1899 "InMemoryDatastore@",
1900 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1901 "SecondDatastore",
1902 ]
1903 registryStr = "/gen3.sqlite3"
1906class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1907 """Test that a yaml file in one location can refer to a root in another."""
1909 datastoreStr = ["dir1"]
1910 # Disable the makeRepo test since we are deliberately not using
1911 # butler.yaml as the config name.
1912 fullConfigKey = None
1914 def setUp(self) -> None:
1915 self.root = makeTestTempDir(TESTDIR)
1917 # Make a new repository in one place
1918 self.dir1 = os.path.join(self.root, "dir1")
1919 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1921 # Move the yaml file to a different place and add a "root"
1922 self.dir2 = os.path.join(self.root, "dir2")
1923 os.makedirs(self.dir2, exist_ok=True)
1924 configFile1 = os.path.join(self.dir1, "butler.yaml")
1925 config = Config(configFile1)
1926 config["root"] = self.dir1
1927 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1928 config.dumpToUri(configFile2)
1929 os.remove(configFile1)
1930 self.tmpConfigFile = configFile2
1932 def testFileLocations(self) -> None:
1933 self.assertNotEqual(self.dir1, self.dir2)
1934 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1935 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1936 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1939class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1940 """Test that a config file created by makeRepo outside of repo works."""
1942 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1944 def setUp(self) -> None:
1945 self.root = makeTestTempDir(TESTDIR)
1946 self.root2 = makeTestTempDir(TESTDIR)
1948 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1949 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1951 def tearDown(self) -> None:
1952 if os.path.exists(self.root2):
1953 shutil.rmtree(self.root2, ignore_errors=True)
1954 super().tearDown()
1956 def testConfigExistence(self) -> None:
1957 c = Config(self.tmpConfigFile)
1958 uri_config = ResourcePath(c["root"])
1959 uri_expected = ResourcePath(self.root, forceDirectory=True)
1960 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1961 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1963 def testPutGet(self) -> None:
1964 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1965 self.runPutGetTest(storageClass, "test_metric")
1968class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1969 """Test that a config file created by makeRepo outside of repo works."""
1971 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1973 def setUp(self) -> None:
1974 self.root = makeTestTempDir(TESTDIR)
1975 self.root2 = makeTestTempDir(TESTDIR)
1977 self.tmpConfigFile = self.root2
1978 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1980 def testConfigExistence(self) -> None:
1981 # Append the yaml file else Config constructor does not know the file
1982 # type.
1983 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1984 super().testConfigExistence()
1987class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1988 """Test that a config file created by makeRepo outside of repo works."""
1990 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1992 def setUp(self) -> None:
1993 self.root = makeTestTempDir(TESTDIR)
1994 self.root2 = makeTestTempDir(TESTDIR)
1996 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1997 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
2000@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
2001class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
2002 """S3Datastore specialization of a butler; an S3 storage Datastore +
2003 a local in-memory SqlRegistry.
2004 """
2006 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
2007 fullConfigKey = None
2008 validationCanFail = True
2010 bucketName = "anybucketname"
2011 """Name of the Bucket that will be used in the tests. The name is read from
2012 the config file used with the tests during set-up.
2013 """
2015 root = "butlerRoot/"
2016 """Root repository directory expected to be used in case useTempRoot=False.
2017 Otherwise the root is set to a 20 characters long randomly generated string
2018 during set-up.
2019 """
2021 datastoreStr = [f"datastore={root}"]
2022 """Contains all expected root locations in a format expected to be
2023 returned by Butler stringification.
2024 """
2026 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
2027 """The expected format of the S3 Datastore string."""
2029 registryStr = "/gen3.sqlite3"
2030 """Expected format of the Registry string."""
2032 mock_aws = mock_aws()
2033 """The mocked s3 interface from moto."""
2035 def genRoot(self) -> str:
2036 """Return a random string of len 20 to serve as a root
2037 name for the temporary bucket repo.
2039 This is equivalent to tempfile.mkdtemp as this is what self.root
2040 becomes when useTempRoot is True.
2041 """
2042 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
2043 return rndstr + "/"
2045 def setUp(self) -> None:
2046 config = Config(self.configFile)
2047 uri = ResourcePath(config[".datastore.datastore.root"])
2048 self.bucketName = uri.netloc
2050 # Enable S3 mocking of tests.
2051 self.enterContext(clean_test_environment_for_s3())
2052 self.mock_aws.start()
2054 if self.useTempRoot:
2055 self.root = self.genRoot()
2056 rooturi = f"s3://{self.bucketName}/{self.root}"
2057 config.update({"datastore": {"datastore": {"root": rooturi}}})
2059 # need local folder to store registry database
2060 self.reg_dir = makeTestTempDir(TESTDIR)
2061 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
2063 # MOTO needs to know that we expect Bucket bucketname to exist
2064 # (this used to be the class attribute bucketName)
2065 s3 = boto3.resource("s3")
2066 s3.create_bucket(Bucket=self.bucketName)
2068 self.datastoreStr = [f"datastore='{rooturi}'"]
2069 self.datastoreName = [f"FileDatastore@{rooturi}"]
2070 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
2071 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
2073 def tearDown(self) -> None:
2074 s3 = boto3.resource("s3")
2075 bucket = s3.Bucket(self.bucketName)
2076 try:
2077 bucket.objects.all().delete()
2078 except botocore.exceptions.ClientError as e:
2079 if e.response["Error"]["Code"] == "404":
2080 # the key was not reachable - pass
2081 pass
2082 else:
2083 raise
2085 bucket = s3.Bucket(self.bucketName)
2086 bucket.delete()
2088 # Stop the S3 mock.
2089 self.mock_aws.stop()
2091 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2092 shutil.rmtree(self.reg_dir, ignore_errors=True)
2094 if self.useTempRoot and os.path.exists(self.root):
2095 shutil.rmtree(self.root, ignore_errors=True)
2097 super().tearDown()
2100class PosixDatastoreTransfers(unittest.TestCase):
2101 """Test data transfers between butlers.
2103 Test for different managers. UUID to UUID and integer to integer are
2104 tested. UUID to integer is not supported since we do not currently
2105 want to allow that. Integer to UUID is supported with the caveat
2106 that UUID4 will be generated and this will be incorrect for raw
2107 dataset types. The test ignores that.
2108 """
2110 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2111 storageClassFactory: StorageClassFactory
2113 @classmethod
2114 def setUpClass(cls) -> None:
2115 cls.storageClassFactory = StorageClassFactory()
2116 cls.storageClassFactory.addFromConfig(cls.configFile)
2118 def setUp(self) -> None:
2119 self.root = makeTestTempDir(TESTDIR)
2120 self.config = Config(self.configFile)
2122 def tearDown(self) -> None:
2123 removeTestTempDir(self.root)
2125 def create_butler(self, manager: str, label: str) -> Butler:
2126 config = Config(self.configFile)
2127 config["registry", "managers", "datasets"] = manager
2128 return Butler.from_config(
2129 Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True
2130 )
2132 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2133 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2134 if manager1 is None:
2135 manager1 = default
2136 if manager2 is None:
2137 manager2 = default
2138 self.source_butler = self.create_butler(manager1, "1")
2139 self.target_butler = self.create_butler(manager2, "2")
2141 def testTransferUuidToUuid(self) -> None:
2142 self.create_butlers()
2143 self.assertButlerTransfers()
2145 def testTransferMissing(self) -> None:
2146 """Test transfers where datastore records are missing.
2148 This is how execution butler works.
2149 """
2150 self.create_butlers()
2152 # Configure the source butler to allow trust.
2153 self.source_butler._datastore._set_trust_mode(True)
2155 self.assertButlerTransfers(purge=True)
2157 def testTransferMissingDisassembly(self) -> None:
2158 """Test transfers where datastore records are missing.
2160 This is how execution butler works.
2161 """
2162 self.create_butlers()
2164 # Configure the source butler to allow trust.
2165 self.source_butler._datastore._set_trust_mode(True)
2167 # Test disassembly.
2168 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2170 def testAbsoluteURITransferDirect(self) -> None:
2171 """Test transfer using an absolute URI."""
2172 self._absolute_transfer("auto")
2174 def testAbsoluteURITransferCopy(self) -> None:
2175 """Test transfer using an absolute URI."""
2176 self._absolute_transfer("copy")
2178 def _absolute_transfer(self, transfer: str) -> None:
2179 self.create_butlers()
2181 storageClassName = "StructuredData"
2182 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2183 datasetTypeName = "random_data"
2184 run = "run1"
2185 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2187 dimensions = self.source_butler.dimensions.conform(())
2188 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2189 self.source_butler.registry.registerDatasetType(datasetType)
2191 metrics = makeExampleMetrics()
2192 with ResourcePath.temporary_uri(suffix=".json") as temp:
2193 dataId = DataCoordinate.make_empty(self.source_butler.dimensions)
2194 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2195 temp.write(json.dumps(metrics.exportAsDict()).encode())
2196 dataset = FileDataset(path=temp, refs=source_refs)
2197 self.source_butler.ingest(dataset, transfer="direct")
2199 self.target_butler.transfer_from(
2200 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2201 )
2203 uri = self.target_butler.getURI(dataset.refs[0])
2204 if transfer == "auto":
2205 self.assertEqual(uri, temp)
2206 else:
2207 self.assertNotEqual(uri, temp)
2209 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2210 """Test that a run can be transferred to another butler."""
2211 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2212 datasetTypeName = "random_data"
2214 # Test will create 3 collections and we will want to transfer
2215 # two of those three.
2216 runs = ["run1", "run2", "other"]
2218 # Also want to use two different dataset types to ensure that
2219 # grouping works.
2220 datasetTypeNames = ["random_data", "random_data_2"]
2222 # Create the run collections in the source butler.
2223 for run in runs:
2224 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2226 # Create dimensions in source butler.
2227 n_exposures = 30
2228 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2229 self.source_butler.registry.insertDimensionData(
2230 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2231 )
2232 self.source_butler.registry.insertDimensionData(
2233 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2234 )
2236 for i in range(n_exposures):
2237 self.source_butler.registry.insertDimensionData(
2238 "exposure",
2239 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2240 )
2242 # Create dataset types in the source butler.
2243 dimensions = self.source_butler.dimensions.conform(["instrument", "exposure"])
2244 for datasetTypeName in datasetTypeNames:
2245 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2246 self.source_butler.registry.registerDatasetType(datasetType)
2248 # Write a dataset to an unrelated run -- this will ensure that
2249 # we are rewriting integer dataset ids in the target if necessary.
2250 # Will not be relevant for UUID.
2251 run = "distraction"
2252 butler = Butler.from_config(butler=self.source_butler, run=run)
2253 butler.put(
2254 makeExampleMetrics(),
2255 datasetTypeName,
2256 exposure=1,
2257 instrument="DummyCamComp",
2258 physical_filter="d-r",
2259 )
2261 # Write some example metrics to the source
2262 butler = Butler.from_config(butler=self.source_butler)
2264 # Set of DatasetRefs that should be in the list of refs to transfer
2265 # but which will not be transferred.
2266 deleted: set[DatasetRef] = set()
2268 n_expected = 20 # Number of datasets expected to be transferred
2269 source_refs = []
2270 for i in range(n_exposures):
2271 # Put a third of datasets into each collection, only retain
2272 # two thirds.
2273 index = i % 3
2274 run = runs[index]
2275 datasetTypeName = datasetTypeNames[i % 2]
2277 metric = MetricsExample(
2278 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2279 )
2280 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2281 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2283 # Remove the datastore record using low-level API, but only
2284 # for a specific index.
2285 if purge and index == 1:
2286 # For one of these delete the file as well.
2287 # This allows the "missing" code to filter the
2288 # file out.
2289 # Access the individual datastores.
2290 datastores = []
2291 if hasattr(butler._datastore, "datastores"):
2292 datastores.extend(butler._datastore.datastores)
2293 else:
2294 datastores.append(butler._datastore)
2296 if not deleted:
2297 # For a chained datastore we need to remove
2298 # files in each chain.
2299 for datastore in datastores:
2300 # The file might not be known to the datastore
2301 # if constraints are used.
2302 try:
2303 primary, uris = datastore.getURIs(ref)
2304 except FileNotFoundError:
2305 continue
2306 if primary and primary.scheme != "mem":
2307 primary.remove()
2308 for uri in uris.values():
2309 if uri.scheme != "mem":
2310 uri.remove()
2311 n_expected -= 1
2312 deleted.add(ref)
2314 # Remove the datastore record.
2315 for datastore in datastores:
2316 if hasattr(datastore, "removeStoredItemInfo"):
2317 datastore.removeStoredItemInfo(ref)
2319 if index < 2:
2320 source_refs.append(ref)
2321 if ref not in deleted:
2322 new_metric = butler.get(ref)
2323 self.assertEqual(new_metric, metric)
2325 # Create some bad dataset types to ensure we check for inconsistent
2326 # definitions.
2327 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2328 for datasetTypeName in datasetTypeNames:
2329 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2330 self.target_butler.registry.registerDatasetType(datasetType)
2331 with self.assertRaises(ConflictingDefinitionError) as cm:
2332 self.target_butler.transfer_from(self.source_butler, source_refs)
2333 self.assertIn("dataset type differs", str(cm.exception))
2335 # And remove the bad definitions.
2336 for datasetTypeName in datasetTypeNames:
2337 self.target_butler.registry.removeDatasetType(datasetTypeName)
2339 # Transfer without creating dataset types should fail.
2340 with self.assertRaises(KeyError):
2341 self.target_butler.transfer_from(self.source_butler, source_refs)
2343 # Transfer without creating dimensions should fail.
2344 with self.assertRaises(ConflictingDefinitionError) as cm:
2345 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2346 self.assertIn("dimension", str(cm.exception))
2348 # The failed transfer above leaves registry in an inconsistent
2349 # state because the run is created but then rolled back without
2350 # the collection cache being cleared. For now force a refresh.
2351 # Can remove with DM-35498.
2352 self.target_butler.registry.refresh()
2354 # Do a dry run -- this should not have any effect on the target butler.
2355 self.target_butler.transfer_from(self.source_butler, source_refs, dry_run=True)
2357 # Transfer the records for one ref to test the alternative API.
2358 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm:
2359 self.target_butler.transfer_dimension_records_from(self.source_butler, [source_refs[0]])
2360 self.assertIn("number of records transferred: 1", ";".join(log_cm.output))
2362 # Now transfer them to the second butler, including dimensions.
2363 with self.assertLogs(logger="lsst", level=logging.DEBUG) as log_cm:
2364 transferred = self.target_butler.transfer_from(
2365 self.source_butler,
2366 source_refs,
2367 register_dataset_types=True,
2368 transfer_dimensions=True,
2369 )
2370 self.assertEqual(len(transferred), n_expected)
2371 log_output = ";".join(log_cm.output)
2373 # A ChainedDatastore will use the in-memory datastore for mexists
2374 # so we can not rely on the mexists log message.
2375 self.assertIn("Number of datastore records found in source", log_output)
2376 self.assertIn("Creating output run", log_output)
2378 # Do the transfer twice to ensure that it will do nothing extra.
2379 # Only do this if purge=True because it does not work for int
2380 # dataset_id.
2381 if purge:
2382 # This should not need to register dataset types.
2383 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2384 self.assertEqual(len(transferred), n_expected)
2386 # Also do an explicit low-level transfer to trigger some
2387 # edge cases.
2388 with self.assertLogs(level=logging.DEBUG) as log_cm:
2389 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2390 log_output = ";".join(log_cm.output)
2391 self.assertIn("no file artifacts exist", log_output)
2393 with self.assertRaises((TypeError, AttributeError)):
2394 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2396 with self.assertRaises(ValueError):
2397 self.target_butler._datastore.transfer_from(
2398 self.source_butler._datastore, source_refs, transfer="split"
2399 )
2401 # Now try to get the same refs from the new butler.
2402 for ref in source_refs:
2403 if ref not in deleted:
2404 new_metric = self.target_butler.get(ref)
2405 old_metric = self.source_butler.get(ref)
2406 self.assertEqual(new_metric, old_metric)
2408 # Now prune run2 collection and create instead a CHAINED collection.
2409 # This should block the transfer.
2410 self.target_butler.removeRuns(["run2"], unstore=True)
2411 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2412 with self.assertRaises(CollectionTypeError):
2413 # Re-importing the run1 datasets can be problematic if they
2414 # use integer IDs so filter those out.
2415 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2416 self.target_butler.transfer_from(self.source_butler, to_transfer)
2419class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2420 """Test transfers using a chained datastore."""
2422 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2425class NullDatastoreTestCase(unittest.TestCase):
2426 """Test that we can fall back to a null datastore."""
2428 # Need a good config to create the repo.
2429 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2430 storageClassFactory: StorageClassFactory
2432 @classmethod
2433 def setUpClass(cls) -> None:
2434 cls.storageClassFactory = StorageClassFactory()
2435 cls.storageClassFactory.addFromConfig(cls.configFile)
2437 def setUp(self) -> None:
2438 """Create a new butler root for each test."""
2439 self.root = makeTestTempDir(TESTDIR)
2440 Butler.makeRepo(self.root, config=Config(self.configFile))
2442 def tearDown(self) -> None:
2443 removeTestTempDir(self.root)
2445 def test_fallback(self) -> None:
2446 # Read the butler config and mess with the datastore section.
2447 config_path = os.path.join(self.root, "butler.yaml")
2448 bad_config = Config(config_path)
2449 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
2450 bad_config.dumpToUri(config_path)
2452 with self.assertRaises(RuntimeError):
2453 Butler(self.root, without_datastore=False)
2455 with self.assertRaises(RuntimeError):
2456 Butler.from_config(self.root, without_datastore=False)
2458 butler = Butler.from_config(self.root, writeable=True, without_datastore=True)
2459 self.assertIsInstance(butler._datastore, NullDatastore)
2461 # Check that registry is working.
2462 butler.registry.registerRun("MYRUN")
2463 collections = butler.registry.queryCollections(...)
2464 self.assertIn("MYRUN", set(collections))
2466 # Create a ref.
2467 dimensions = butler.dimensions.conform([])
2468 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2469 datasetTypeName = "metric"
2470 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2471 butler.registry.registerDatasetType(datasetType)
2472 ref = DatasetRef(datasetType, {}, run="MYRUN")
2474 # Check that datastore will complain.
2475 with self.assertRaises(FileNotFoundError):
2476 butler.get(ref)
2477 with self.assertRaises(FileNotFoundError):
2478 butler.getURI(ref)
2481def setup_module(module: types.ModuleType) -> None:
2482 """Set up the module for pytest."""
2483 clean_environment()
2486if __name__ == "__main__":
2487 clean_environment()
2488 unittest.main()