Coverage for tests/test_butler.py: 13%
1304 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Tests for Butler.
29"""
30from __future__ import annotations
32import gc
33import json
34import logging
35import os
36import pathlib
37import pickle
38import posixpath
39import random
40import shutil
41import string
42import tempfile
43import unittest
44import uuid
45from collections.abc import Mapping
46from typing import TYPE_CHECKING, Any, cast
48try:
49 import boto3
50 import botocore
51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials
52 from moto import mock_s3 # type: ignore[import]
53except ImportError:
54 boto3 = None
56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def]
57 """No-op decorator in case moto mock_s3 can not be imported."""
58 return None
61try:
62 # It's possible but silly to have testing.postgresql installed without
63 # having the postgresql server installed (because then nothing in
64 # testing.postgresql would work), so we use the presence of that module
65 # to test whether we can expect the server to be available.
66 import testing.postgresql # type: ignore[import]
67except ImportError:
68 testing = None
70import astropy.time
71import sqlalchemy
72from lsst.daf.butler import (
73 Butler,
74 ButlerConfig,
75 ButlerRepoIndex,
76 CollectionType,
77 Config,
78 DataCoordinate,
79 DatasetExistence,
80 DatasetRef,
81 DatasetType,
82 FileDataset,
83 FileTemplate,
84 FileTemplateValidationError,
85 NullDatastore,
86 StorageClassFactory,
87 ValidationError,
88 script,
89)
90from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
91from lsst.daf.butler.datastores.fileDatastore import FileDatastore
92from lsst.daf.butler.registries.sql import SqlRegistry
93from lsst.daf.butler.registry import (
94 CollectionError,
95 CollectionTypeError,
96 ConflictingDefinitionError,
97 DataIdValueError,
98 MissingCollectionError,
99 OrphanedRecordError,
100)
101from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter
102from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir
103from lsst.resources import ResourcePath
104from lsst.utils import doImportType
105from lsst.utils.introspection import get_full_type_name
107if TYPE_CHECKING:
108 import types
110 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass
112TESTDIR = os.path.abspath(os.path.dirname(__file__))
115def clean_environment() -> None:
116 """Remove external environment variables that affect the tests."""
117 for k in (
118 "DAF_BUTLER_REPOSITORY_INDEX",
119 "S3_ENDPOINT_URL",
120 "AWS_ACCESS_KEY_ID",
121 "AWS_SECRET_ACCESS_KEY",
122 "AWS_SHARED_CREDENTIALS_FILE",
123 ):
124 os.environ.pop(k, None)
127def makeExampleMetrics() -> MetricsExample:
128 """Return example dataset suitable for tests."""
129 return MetricsExample(
130 {"AM1": 5.2, "AM2": 30.6},
131 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}},
132 [563, 234, 456.7, 752, 8, 9, 27],
133 )
136class TransactionTestError(Exception):
137 """Specific error for testing transactions, to prevent misdiagnosing
138 that might otherwise occur when a standard exception is used.
139 """
141 pass
144class ButlerConfigTests(unittest.TestCase):
145 """Simple tests for ButlerConfig that are not tested in any other test
146 cases.
147 """
149 def testSearchPath(self) -> None:
150 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
151 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
152 config1 = ButlerConfig(configFile)
153 self.assertNotIn("testConfigs", "\n".join(cm.output))
155 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
156 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
157 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
158 self.assertIn("testConfigs", "\n".join(cm.output))
160 key = ("datastore", "records", "table")
161 self.assertNotEqual(config1[key], config2[key])
162 self.assertEqual(config2[key], "override_record")
165class ButlerPutGetTests(TestCaseMixin):
166 """Helper method for running a suite of put/get tests from different
167 butler configurations.
168 """
170 root: str
171 default_run = "ingésτ😺"
172 storageClassFactory: StorageClassFactory
173 configFile: str
174 tmpConfigFile: str
176 @staticmethod
177 def addDatasetType(
178 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry
179 ) -> DatasetType:
180 """Create a DatasetType and register it"""
181 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
182 registry.registerDatasetType(datasetType)
183 return datasetType
185 @classmethod
186 def setUpClass(cls) -> None:
187 cls.storageClassFactory = StorageClassFactory()
188 cls.storageClassFactory.addFromConfig(cls.configFile)
190 def assertGetComponents(
191 self,
192 butler: Butler,
193 datasetRef: DatasetRef,
194 components: tuple[str, ...],
195 reference: Any,
196 collections: Any = None,
197 ) -> None:
198 datasetType = datasetRef.datasetType
199 dataId = datasetRef.dataId
200 deferred = butler.getDeferred(datasetRef)
202 for component in components:
203 compTypeName = datasetType.componentTypeName(component)
204 result = butler.get(compTypeName, dataId, collections=collections)
205 self.assertEqual(result, getattr(reference, component))
206 result_deferred = deferred.get(component=component)
207 self.assertEqual(result_deferred, result)
209 def tearDown(self) -> None:
210 removeTestTempDir(self.root)
212 def create_butler(
213 self, run: str, storageClass: StorageClass | str, datasetTypeName: str
214 ) -> tuple[Butler, DatasetType]:
215 butler = Butler(self.tmpConfigFile, run=run)
217 collections = set(butler.registry.queryCollections())
218 self.assertEqual(collections, {run})
220 # Create and register a DatasetType
221 dimensions = butler.dimensions.extract(["instrument", "visit"])
223 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
225 # Add needed Dimensions
226 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
227 butler.registry.insertDimensionData(
228 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
229 )
230 butler.registry.insertDimensionData(
231 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"}
232 )
233 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
234 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
235 butler.registry.insertDimensionData(
236 "visit",
237 {
238 "instrument": "DummyCamComp",
239 "id": 423,
240 "name": "fourtwentythree",
241 "physical_filter": "d-r",
242 "visit_system": 1,
243 "datetime_begin": visit_start,
244 "datetime_end": visit_end,
245 },
246 )
248 # Add more visits for some later tests
249 for visit_id in (424, 425):
250 butler.registry.insertDimensionData(
251 "visit",
252 {
253 "instrument": "DummyCamComp",
254 "id": visit_id,
255 "name": f"fourtwentyfour_{visit_id}",
256 "physical_filter": "d-r",
257 "visit_system": 1,
258 },
259 )
260 return butler, datasetType
262 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler:
263 # New datasets will be added to run and tag, but we will only look in
264 # tag when looking up datasets.
265 run = self.default_run
266 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
267 assert butler.run is not None
269 # Create and store a dataset
270 metric = makeExampleMetrics()
271 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423})
273 # Put and remove the dataset once as a DatasetRef, once as a dataId,
274 # and once with a DatasetType
276 # Keep track of any collections we add and do not clean up
277 expected_collections = {run}
279 counter = 0
280 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1")
281 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate]
282 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)):
283 # Since we are using subTest we can get cascading failures
284 # here with the first attempt failing and the others failing
285 # immediately because the dataset already exists. Work around
286 # this by using a distinct run collection each time
287 counter += 1
288 this_run = f"put_run_{counter}"
289 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
290 expected_collections.update({this_run})
292 with self.subTest(args=args):
293 kwargs: dict[str, Any] = {}
294 if not isinstance(args[0], DatasetRef): # type: ignore
295 kwargs["run"] = this_run
296 ref = butler.put(metric, *args, **kwargs)
297 self.assertIsInstance(ref, DatasetRef)
299 # Test getDirect
300 metricOut = butler.get(ref)
301 self.assertEqual(metric, metricOut)
302 # Test get
303 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
304 self.assertEqual(metric, metricOut)
305 # Test get with a datasetRef
306 metricOut = butler.get(ref)
307 self.assertEqual(metric, metricOut)
308 # Test getDeferred with dataId
309 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
310 self.assertEqual(metric, metricOut)
311 # Test getDeferred with a ref
312 metricOut = butler.getDeferred(ref).get()
313 self.assertEqual(metric, metricOut)
315 # Check we can get components
316 if storageClass.isComposite():
317 self.assertGetComponents(
318 butler, ref, ("summary", "data", "output"), metric, collections=this_run
319 )
321 # Can the artifacts themselves be retrieved?
322 if not butler._datastore.isEphemeral:
323 root_uri = ResourcePath(self.root)
325 for preserve_path in (True, False):
326 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
327 # Use copy so that we can test that overwrite
328 # protection works (using "auto" for File URIs would
329 # use hard links and subsequent transfer would work
330 # because it knows they are the same file).
331 transferred = butler.retrieveArtifacts(
332 [ref], destination, preserve_path=preserve_path, transfer="copy"
333 )
334 self.assertGreater(len(transferred), 0)
335 artifacts = list(ResourcePath.findFileResources([destination]))
336 self.assertEqual(set(transferred), set(artifacts))
338 for artifact in transferred:
339 path_in_destination = artifact.relative_to(destination)
340 self.assertIsNotNone(path_in_destination)
341 assert path_in_destination is not None
343 # when path is not preserved there should not be
344 # any path separators.
345 num_seps = path_in_destination.count("/")
346 if preserve_path:
347 self.assertGreater(num_seps, 0)
348 else:
349 self.assertEqual(num_seps, 0)
351 primary_uri, secondary_uris = butler.getURIs(ref)
352 n_uris = len(secondary_uris)
353 if primary_uri:
354 n_uris += 1
355 self.assertEqual(
356 len(artifacts),
357 n_uris,
358 "Comparing expected artifacts vs actual:"
359 f" {artifacts} vs {primary_uri} and {secondary_uris}",
360 )
362 if preserve_path:
363 # No need to run these twice
364 with self.assertRaises(ValueError):
365 butler.retrieveArtifacts([ref], destination, transfer="move")
367 with self.assertRaises(FileExistsError):
368 butler.retrieveArtifacts([ref], destination)
370 transferred_again = butler.retrieveArtifacts(
371 [ref], destination, preserve_path=preserve_path, overwrite=True
372 )
373 self.assertEqual(set(transferred_again), set(transferred))
375 # Now remove the dataset completely.
376 butler.pruneDatasets([ref], purge=True, unstore=True)
377 # Lookup with original args should still fail.
378 kwargs = {"collections": this_run}
379 if isinstance(args[0], DatasetRef):
380 kwargs = {} # Prevent warning from being issued.
381 self.assertFalse(butler.exists(*args, **kwargs))
382 # get() should still fail.
383 with self.assertRaises(FileNotFoundError):
384 butler.get(ref)
385 # Registry shouldn't be able to find it by dataset_id anymore.
386 self.assertIsNone(butler.registry.getDataset(ref.id))
388 # Do explicit registry removal since we know they are
389 # empty
390 butler.registry.removeCollection(this_run)
391 expected_collections.remove(this_run)
393 # Create DatasetRef for put using default run.
394 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)
396 # Check that getDeferred fails with standalone ref.
397 with self.assertRaises(LookupError):
398 butler.getDeferred(refIn)
400 # Put the dataset again, since the last thing we did was remove it
401 # and we want to use the default collection.
402 ref = butler.put(metric, refIn)
404 # Get with parameters
405 stop = 4
406 sliced = butler.get(ref, parameters={"slice": slice(stop)})
407 self.assertNotEqual(metric, sliced)
408 self.assertEqual(metric.summary, sliced.summary)
409 self.assertEqual(metric.output, sliced.output)
410 assert metric.data is not None # for mypy
411 self.assertEqual(metric.data[:stop], sliced.data)
412 # getDeferred with parameters
413 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
414 self.assertNotEqual(metric, sliced)
415 self.assertEqual(metric.summary, sliced.summary)
416 self.assertEqual(metric.output, sliced.output)
417 self.assertEqual(metric.data[:stop], sliced.data)
418 # getDeferred with deferred parameters
419 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
420 self.assertNotEqual(metric, sliced)
421 self.assertEqual(metric.summary, sliced.summary)
422 self.assertEqual(metric.output, sliced.output)
423 self.assertEqual(metric.data[:stop], sliced.data)
425 if storageClass.isComposite():
426 # Check that components can be retrieved
427 metricOut = butler.get(ref.datasetType.name, dataId)
428 compNameS = ref.datasetType.componentTypeName("summary")
429 compNameD = ref.datasetType.componentTypeName("data")
430 summary = butler.get(compNameS, dataId)
431 self.assertEqual(summary, metric.summary)
432 data = butler.get(compNameD, dataId)
433 self.assertEqual(data, metric.data)
435 if "counter" in storageClass.derivedComponents:
436 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
437 self.assertEqual(count, len(data))
439 count = butler.get(
440 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)}
441 )
442 self.assertEqual(count, stop)
444 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
445 assert compRef is not None
446 summary = butler.get(compRef)
447 self.assertEqual(summary, metric.summary)
449 # Create a Dataset type that has the same name but is inconsistent.
450 inconsistentDatasetType = DatasetType(
451 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config")
452 )
454 # Getting with a dataset type that does not match registry fails
455 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"):
456 butler.get(inconsistentDatasetType, dataId)
458 # Combining a DatasetRef with a dataId should fail
459 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"):
460 butler.get(ref, dataId)
461 # Getting with an explicit ref should fail if the id doesn't match.
462 with self.assertRaises(FileNotFoundError):
463 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run))
465 # Getting a dataset with unknown parameters should fail
466 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"):
467 butler.get(ref, parameters={"unsupported": True})
469 # Check we have a collection
470 collections = set(butler.registry.queryCollections())
471 self.assertEqual(collections, expected_collections)
473 # Clean up to check that we can remove something that may have
474 # already had a component removed
475 butler.pruneDatasets([ref], unstore=True, purge=True)
477 # Add the same ref again, so we can check that duplicate put fails.
478 ref = butler.put(metric, datasetType, dataId)
480 # Repeat put will fail.
481 with self.assertRaisesRegex(
482 ConflictingDefinitionError, "A database constraint failure was triggered"
483 ):
484 butler.put(metric, datasetType, dataId)
486 # Remove the datastore entry.
487 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
489 # Put will still fail
490 with self.assertRaisesRegex(
491 ConflictingDefinitionError, "A database constraint failure was triggered"
492 ):
493 butler.put(metric, datasetType, dataId)
495 # Repeat the same sequence with resolved ref.
496 butler.pruneDatasets([ref], unstore=True, purge=True)
497 ref = butler.put(metric, refIn)
499 # Repeat put will fail.
500 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"):
501 butler.put(metric, refIn)
503 # Remove the datastore entry.
504 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
506 # In case of resolved ref this write will succeed.
507 ref = butler.put(metric, refIn)
509 # Leave the dataset in place since some downstream tests require
510 # something to be present
512 return butler
514 def testDeferredCollectionPassing(self) -> None:
515 # Construct a butler with no run or collection, but make it writeable.
516 butler = Butler(self.tmpConfigFile, writeable=True)
517 # Create and register a DatasetType
518 dimensions = butler.dimensions.extract(["instrument", "visit"])
519 datasetType = self.addDatasetType(
520 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry
521 )
522 # Add needed Dimensions
523 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
524 butler.registry.insertDimensionData(
525 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
526 )
527 butler.registry.insertDimensionData(
528 "visit",
529 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
530 )
531 dataId = {"instrument": "DummyCamComp", "visit": 423}
532 # Create dataset.
533 metric = makeExampleMetrics()
534 # Register a new run and put dataset.
535 run = "deferred"
536 self.assertTrue(butler.registry.registerRun(run))
537 # Second time it will be allowed but indicate no-op
538 self.assertFalse(butler.registry.registerRun(run))
539 ref = butler.put(metric, datasetType, dataId, run=run)
540 # Putting with no run should fail with TypeError.
541 with self.assertRaises(CollectionError):
542 butler.put(metric, datasetType, dataId)
543 # Dataset should exist.
544 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
545 # We should be able to get the dataset back, but with and without
546 # a deferred dataset handle.
547 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
548 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
549 # Trying to find the dataset without any collection is a TypeError.
550 self.assertFalse(butler.exists(datasetType, dataId))
551 with self.assertRaises(CollectionError):
552 butler.get(datasetType, dataId)
553 # Associate the dataset with a different collection.
554 butler.registry.registerCollection("tagged")
555 butler.registry.associate("tagged", [ref])
556 # Deleting the dataset from the new collection should make it findable
557 # in the original collection.
558 butler.pruneDatasets([ref], tags=["tagged"])
559 self.assertTrue(butler.exists(datasetType, dataId, collections=[run]))
562class ButlerTests(ButlerPutGetTests):
563 """Tests for Butler."""
565 useTempRoot = True
566 validationCanFail: bool
567 fullConfigKey: str | None
568 registryStr: str | None
569 datastoreName: list[str] | None
570 datastoreStr: list[str]
572 def setUp(self) -> None:
573 """Create a new butler root for each test."""
574 self.root = makeTestTempDir(TESTDIR)
575 Butler.makeRepo(self.root, config=Config(self.configFile))
576 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
578 def testConstructor(self) -> None:
579 """Independent test of constructor."""
580 butler = Butler(self.tmpConfigFile, run=self.default_run)
581 self.assertIsInstance(butler, Butler)
583 # Check that butler.yaml is added automatically.
584 if self.tmpConfigFile.endswith(end := "/butler.yaml"):
585 config_dir = self.tmpConfigFile[: -len(end)]
586 butler = Butler(config_dir, run=self.default_run)
587 self.assertIsInstance(butler, Butler)
589 # Even with a ResourcePath.
590 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run)
591 self.assertIsInstance(butler, Butler)
593 collections = set(butler.registry.queryCollections())
594 self.assertEqual(collections, {self.default_run})
596 # Check that some special characters can be included in run name.
597 special_run = "u@b.c-A"
598 butler_special = Butler(butler=butler, run=special_run)
599 collections = set(butler_special.registry.queryCollections("*@*"))
600 self.assertEqual(collections, {special_run})
602 butler2 = Butler(butler=butler, collections=["other"])
603 self.assertEqual(butler2.collections, ("other",))
604 self.assertIsNone(butler2.run)
605 self.assertIs(butler._datastore, butler2._datastore)
607 # Test that we can use an environment variable to find this
608 # repository.
609 butler_index = Config()
610 butler_index["label"] = self.tmpConfigFile
611 for suffix in (".yaml", ".json"):
612 # Ensure that the content differs so that we know that
613 # we aren't reusing the cache.
614 bad_label = f"file://bucket/not_real{suffix}"
615 butler_index["bad_label"] = bad_label
616 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
617 butler_index.dumpToUri(temp_file)
618 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
619 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"})
620 uri = Butler.get_repo_uri("bad_label")
621 self.assertEqual(uri, ResourcePath(bad_label))
622 uri = Butler.get_repo_uri("label")
623 butler = Butler(uri, writeable=False)
624 self.assertIsInstance(butler, Butler)
625 butler = Butler("label", writeable=False)
626 self.assertIsInstance(butler, Butler)
627 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
628 Butler("not_there", writeable=False)
629 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
630 Butler("bad_label")
631 with self.assertRaises(FileNotFoundError):
632 # Should ignore aliases.
633 Butler(ResourcePath("label", forceAbsolute=False))
634 with self.assertRaises(KeyError) as cm:
635 Butler.get_repo_uri("missing")
636 self.assertEqual(
637 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
638 )
639 self.assertIn("not known to", str(cm.exception))
640 # Should report no failure.
641 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
642 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
643 # Now with empty configuration.
644 butler_index = Config()
645 butler_index.dumpToUri(temp_file)
646 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
647 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
648 Butler("label")
649 with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
650 # Now with bad contents.
651 with open(temp_file.ospath, "w") as fh:
652 print("'", file=fh)
653 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
654 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
655 Butler("label")
656 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
657 with self.assertRaises(FileNotFoundError):
658 Butler.get_repo_uri("label")
659 self.assertEqual(Butler.get_known_repos(), set())
661 with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
662 Butler("label")
664 # Check that we can create Butler when the alias file is not found.
665 butler = Butler(self.tmpConfigFile, writeable=False)
666 self.assertIsInstance(butler, Butler)
667 with self.assertRaises(KeyError) as cm:
668 # No environment variable set.
669 Butler.get_repo_uri("label")
670 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
671 self.assertIn("No repository index defined", str(cm.exception))
672 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
673 # No aliases registered.
674 Butler("not_there")
675 self.assertEqual(Butler.get_known_repos(), set())
677 def testBasicPutGet(self) -> None:
678 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
679 self.runPutGetTest(storageClass, "test_metric")
681 def testCompositePutGetConcrete(self) -> None:
682 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
683 butler = self.runPutGetTest(storageClass, "test_metric")
685 # Should *not* be disassembled
686 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
687 self.assertEqual(len(datasets), 1)
688 uri, components = butler.getURIs(datasets[0])
689 self.assertIsInstance(uri, ResourcePath)
690 self.assertFalse(components)
691 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
692 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
694 # Predicted dataset
695 dataId = {"instrument": "DummyCamComp", "visit": 424}
696 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
697 self.assertFalse(components)
698 self.assertIsInstance(uri, ResourcePath)
699 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
700 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
702 def testCompositePutGetVirtual(self) -> None:
703 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
704 butler = self.runPutGetTest(storageClass, "test_metric_comp")
706 # Should be disassembled
707 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
708 self.assertEqual(len(datasets), 1)
709 uri, components = butler.getURIs(datasets[0])
711 if butler._datastore.isEphemeral:
712 # Never disassemble in-memory datastore
713 self.assertIsInstance(uri, ResourcePath)
714 self.assertFalse(components)
715 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
716 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
717 else:
718 self.assertIsNone(uri)
719 self.assertEqual(set(components), set(storageClass.components))
720 for compuri in components.values():
721 self.assertIsInstance(compuri, ResourcePath)
722 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
723 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
725 # Predicted dataset
726 dataId = {"instrument": "DummyCamComp", "visit": 424}
727 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
729 if butler._datastore.isEphemeral:
730 # Never disassembled
731 self.assertIsInstance(uri, ResourcePath)
732 self.assertFalse(components)
733 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
734 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
735 else:
736 self.assertIsNone(uri)
737 self.assertEqual(set(components), set(storageClass.components))
738 for compuri in components.values():
739 self.assertIsInstance(compuri, ResourcePath)
740 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
741 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
743 def testStorageClassOverrideGet(self) -> None:
744 """Test storage class conversion on get with override."""
745 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
746 datasetTypeName = "anything"
747 run = self.default_run
749 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)
751 # Create and store a dataset.
752 metric = makeExampleMetrics()
753 dataId = {"instrument": "DummyCamComp", "visit": 423}
755 ref = butler.put(metric, datasetType, dataId)
757 # Return native type.
758 retrieved = butler.get(ref)
759 self.assertEqual(retrieved, metric)
761 # Specify an override.
762 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion")
763 model = butler.get(ref, storageClass=new_sc)
764 self.assertNotEqual(type(model), type(retrieved))
765 self.assertIs(type(model), new_sc.pytype)
766 self.assertEqual(retrieved, model)
768 # Defer but override later.
769 deferred = butler.getDeferred(ref)
770 model = deferred.get(storageClass=new_sc)
771 self.assertIs(type(model), new_sc.pytype)
772 self.assertEqual(retrieved, model)
774 # Defer but override up front.
775 deferred = butler.getDeferred(ref, storageClass=new_sc)
776 model = deferred.get()
777 self.assertIs(type(model), new_sc.pytype)
778 self.assertEqual(retrieved, model)
780 # Retrieve a component. Should be a tuple.
781 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple")
782 self.assertIs(type(data), tuple)
783 self.assertEqual(data, tuple(retrieved.data))
785 # Parameter on the write storage class should work regardless
786 # of read storage class.
787 data = butler.get(
788 "anything.data",
789 dataId,
790 storageClass="StructuredDataDataTestTuple",
791 parameters={"slice": slice(2, 4)},
792 )
793 self.assertEqual(len(data), 2)
795 # Try a parameter that is known to the read storage class but not
796 # the write storage class.
797 with self.assertRaises(KeyError):
798 butler.get(
799 "anything.data",
800 dataId,
801 storageClass="StructuredDataDataTestTuple",
802 parameters={"xslice": slice(2, 4)},
803 )
805 def testPytypePutCoercion(self) -> None:
806 """Test python type coercion on Butler.get and put."""
807 # Store some data with the normal example storage class.
808 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
809 datasetTypeName = "test_metric"
810 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)
812 dataId = {"instrument": "DummyCamComp", "visit": 423}
814 # Put a dict and this should coerce to a MetricsExample
815 test_dict = {"summary": {"a": 1}, "output": {"b": 2}}
816 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424)
817 test_metric = butler.get(metric_ref)
818 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample")
819 self.assertEqual(test_metric.summary, test_dict["summary"])
820 self.assertEqual(test_metric.output, test_dict["output"])
822 # Check that the put still works if a DatasetType is given with
823 # a definition matching this python type.
824 registry_type = butler.registry.getDatasetType(datasetTypeName)
825 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson")
826 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425)
827 self.assertEqual(metric2_ref.datasetType, registry_type)
829 # The get will return the type expected by registry.
830 test_metric2 = butler.get(metric2_ref)
831 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample")
833 # Make a new DatasetRef with the compatible but different DatasetType.
834 # This should now return a dict.
835 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run)
836 test_dict2 = butler.get(new_ref)
837 self.assertEqual(get_full_type_name(test_dict2), "dict")
839 # Get it again with the wrong dataset type definition using get()
840 # rather than get(). This should be consistent with get()
841 # behavior and return the type of the DatasetType.
842 test_dict3 = butler.get(this_type, dataId=dataId, visit=425)
843 self.assertEqual(get_full_type_name(test_dict3), "dict")
845 def testIngest(self) -> None:
846 butler = Butler(self.tmpConfigFile, run=self.default_run)
848 # Create and register a DatasetType
849 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"])
851 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
852 datasetTypeName = "metric"
854 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
856 # Add needed Dimensions
857 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
858 butler.registry.insertDimensionData(
859 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
860 )
861 for detector in (1, 2):
862 butler.registry.insertDimensionData(
863 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"}
864 )
866 butler.registry.insertDimensionData(
867 "visit",
868 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"},
869 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"},
870 )
872 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter")
873 dataRoot = os.path.join(TESTDIR, "data", "basic")
874 datasets = []
875 for detector in (1, 2):
876 detector_name = f"detector_{detector}"
877 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
878 dataId = butler.registry.expandDataId(
879 {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
880 )
881 # Create a DatasetRef for ingest
882 refIn = DatasetRef(datasetType, dataId, run=self.default_run)
884 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter))
886 butler.ingest(*datasets, transfer="copy")
888 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
889 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
891 metrics1 = butler.get(datasetTypeName, dataId1)
892 metrics2 = butler.get(datasetTypeName, dataId2)
893 self.assertNotEqual(metrics1, metrics2)
895 # Compare URIs
896 uri1 = butler.getURI(datasetTypeName, dataId1)
897 uri2 = butler.getURI(datasetTypeName, dataId2)
898 self.assertNotEqual(uri1, uri2)
900 # Now do a multi-dataset but single file ingest
901 metricFile = os.path.join(dataRoot, "detectors.yaml")
902 refs = []
903 for detector in (1, 2):
904 detector_name = f"detector_{detector}"
905 dataId = butler.registry.expandDataId(
906 {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
907 )
908 # Create a DatasetRef for ingest
909 refs.append(DatasetRef(datasetType, dataId, run=self.default_run))
911 # Test "move" transfer to ensure that the files themselves
912 # have disappeared following ingest.
913 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile:
914 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy")
916 datasets = []
917 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter))
919 # For first ingest use copy.
920 butler.ingest(*datasets, transfer="copy", record_validation_info=False)
922 # Now try to ingest again in "execution butler" mode where
923 # the registry entries exist but the datastore does not have
924 # the files. We also need to strip the dimension records to ensure
925 # that they will be re-added by the ingest.
926 ref = datasets[0].refs[0]
927 datasets[0].refs = [
928 cast(
929 DatasetRef,
930 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run),
931 )
932 for ref in datasets[0].refs
933 ]
934 all_refs = []
935 for dataset in datasets:
936 refs = []
937 for ref in dataset.refs:
938 # Create a dict from the dataId to drop the records.
939 new_data_id = {str(k): v for k, v in ref.dataId.items()}
940 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run)
941 assert new_ref is not None
942 self.assertFalse(new_ref.dataId.hasRecords())
943 refs.append(new_ref)
944 dataset.refs = refs
945 all_refs.extend(dataset.refs)
946 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False)
948 # Use move mode to test that the file is deleted. Also
949 # disable recording of file size.
950 butler.ingest(*datasets, transfer="move", record_validation_info=False)
952 # Check that every ref now has records.
953 for dataset in datasets:
954 for ref in dataset.refs:
955 self.assertTrue(ref.dataId.hasRecords())
957 # Ensure that the file has disappeared.
958 self.assertFalse(tempFile.exists())
960 # Check that the datastore recorded no file size.
961 # Not all datastores can support this.
962 try:
963 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined]
964 self.assertEqual(infos[0].file_size, -1)
965 except AttributeError:
966 pass
968 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
969 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
971 multi1 = butler.get(datasetTypeName, dataId1)
972 multi2 = butler.get(datasetTypeName, dataId2)
974 self.assertEqual(multi1, metrics1)
975 self.assertEqual(multi2, metrics2)
977 # Compare URIs
978 uri1 = butler.getURI(datasetTypeName, dataId1)
979 uri2 = butler.getURI(datasetTypeName, dataId2)
980 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
982 # Test that removing one does not break the second
983 # This line will issue a warning log message for a ChainedDatastore
984 # that uses an InMemoryDatastore since in-memory can not ingest
985 # files.
986 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
987 self.assertFalse(butler.exists(datasetTypeName, dataId1))
988 self.assertTrue(butler.exists(datasetTypeName, dataId2))
989 multi2b = butler.get(datasetTypeName, dataId2)
990 self.assertEqual(multi2, multi2b)
992 # Ensure we can ingest 0 datasets
993 datasets = []
994 butler.ingest(*datasets)
996 def testPickle(self) -> None:
997 """Test pickle support."""
998 butler = Butler(self.tmpConfigFile, run=self.default_run)
999 butlerOut = pickle.loads(pickle.dumps(butler))
1000 self.assertIsInstance(butlerOut, Butler)
1001 self.assertEqual(butlerOut._config, butler._config)
1002 self.assertEqual(butlerOut.collections, butler.collections)
1003 self.assertEqual(butlerOut.run, butler.run)
1005 def testGetDatasetTypes(self) -> None:
1006 butler = Butler(self.tmpConfigFile, run=self.default_run)
1007 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"])
1008 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [
1009 (
1010 "instrument",
1011 [
1012 {"instrument": "DummyCam"},
1013 {"instrument": "DummyHSC"},
1014 {"instrument": "DummyCamComp"},
1015 ],
1016 ),
1017 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]),
1018 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]),
1019 ]
1020 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1021 # Add needed Dimensions
1022 for element, data in dimensionEntries:
1023 butler.registry.insertDimensionData(element, *data)
1025 # When a DatasetType is added to the registry entries are not created
1026 # for components but querying them can return the components.
1027 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
1028 components = set()
1029 for datasetTypeName in datasetTypeNames:
1030 # Create and register a DatasetType
1031 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1033 for componentName in storageClass.components:
1034 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
1036 fromRegistry: set[DatasetType] = set()
1037 for parent_dataset_type in butler.registry.queryDatasetTypes():
1038 fromRegistry.add(parent_dataset_type)
1039 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes())
1040 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
1042 # Now that we have some dataset types registered, validate them
1043 butler.validateConfiguration(
1044 ignore=[
1045 "test_metric_comp",
1046 "metric3",
1047 "metric5",
1048 "calexp",
1049 "DummySC",
1050 "datasetType.component",
1051 "random_data",
1052 "random_data_2",
1053 ]
1054 )
1056 # Add a new datasetType that will fail template validation
1057 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
1058 if self.validationCanFail:
1059 with self.assertRaises(ValidationError):
1060 butler.validateConfiguration()
1062 # Rerun validation but with a subset of dataset type names
1063 butler.validateConfiguration(datasetTypeNames=["metric4"])
1065 # Rerun validation but ignore the bad datasetType
1066 butler.validateConfiguration(
1067 ignore=[
1068 "test_metric_comp",
1069 "metric3",
1070 "metric5",
1071 "calexp",
1072 "DummySC",
1073 "datasetType.component",
1074 "random_data",
1075 "random_data_2",
1076 ]
1077 )
1079 def testTransaction(self) -> None:
1080 butler = Butler(self.tmpConfigFile, run=self.default_run)
1081 datasetTypeName = "test_metric"
1082 dimensions = butler.dimensions.extract(["instrument", "visit"])
1083 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = (
1084 ("instrument", {"instrument": "DummyCam"}),
1085 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
1086 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
1087 )
1088 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
1089 metric = makeExampleMetrics()
1090 dataId = {"instrument": "DummyCam", "visit": 42}
1091 # Create and register a DatasetType
1092 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
1093 with self.assertRaises(TransactionTestError):
1094 with butler.transaction():
1095 # Add needed Dimensions
1096 for args in dimensionEntries:
1097 butler.registry.insertDimensionData(*args)
1098 # Store a dataset
1099 ref = butler.put(metric, datasetTypeName, dataId)
1100 self.assertIsInstance(ref, DatasetRef)
1101 # Test getDirect
1102 metricOut = butler.get(ref)
1103 self.assertEqual(metric, metricOut)
1104 # Test get
1105 metricOut = butler.get(datasetTypeName, dataId)
1106 self.assertEqual(metric, metricOut)
1107 # Check we can get components
1108 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric)
1109 raise TransactionTestError("This should roll back the entire transaction")
1110 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"):
1111 butler.registry.expandDataId(dataId)
1112 # Should raise LookupError for missing data ID value
1113 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
1114 butler.get(datasetTypeName, dataId)
1115 # Also check explicitly if Dataset entry is missing
1116 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
1117 # Direct retrieval should not find the file in the Datastore
1118 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
1119 butler.get(ref)
1121 def testMakeRepo(self) -> None:
1122 """Test that we can write butler configuration to a new repository via
1123 the Butler.makeRepo interface and then instantiate a butler from the
1124 repo root.
1125 """
1126 # Do not run the test if we know this datastore configuration does
1127 # not support a file system root
1128 if self.fullConfigKey is None:
1129 return
1131 # create two separate directories
1132 root1 = tempfile.mkdtemp(dir=self.root)
1133 root2 = tempfile.mkdtemp(dir=self.root)
1135 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
1136 limited = Config(self.configFile)
1137 butler1 = Butler(butlerConfig)
1138 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
1139 full = Config(self.tmpConfigFile)
1140 butler2 = Butler(butlerConfig)
1141 # Butlers should have the same configuration regardless of whether
1142 # defaults were expanded.
1143 self.assertEqual(butler1._config, butler2._config)
1144 # Config files loaded directly should not be the same.
1145 self.assertNotEqual(limited, full)
1146 # Make sure "limited" doesn't have a few keys we know it should be
1147 # inheriting from defaults.
1148 self.assertIn(self.fullConfigKey, full)
1149 self.assertNotIn(self.fullConfigKey, limited)
1151 # Collections don't appear until something is put in them
1152 collections1 = set(butler1.registry.queryCollections())
1153 self.assertEqual(collections1, set())
1154 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
1156 # Check that a config with no associated file name will not
1157 # work properly with relocatable Butler repo
1158 butlerConfig.configFile = None
1159 with self.assertRaises(ValueError):
1160 Butler(butlerConfig)
1162 with self.assertRaises(FileExistsError):
1163 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)
1165 def testStringification(self) -> None:
1166 butler = Butler(self.tmpConfigFile, run=self.default_run)
1167 butlerStr = str(butler)
1169 if self.datastoreStr is not None:
1170 for testStr in self.datastoreStr:
1171 self.assertIn(testStr, butlerStr)
1172 if self.registryStr is not None:
1173 self.assertIn(self.registryStr, butlerStr)
1175 datastoreName = butler._datastore.name
1176 if self.datastoreName is not None:
1177 for testStr in self.datastoreName:
1178 self.assertIn(testStr, datastoreName)
1180 def testButlerRewriteDataId(self) -> None:
1181 """Test that dataIds can be rewritten based on dimension records."""
1182 butler = Butler(self.tmpConfigFile, run=self.default_run)
1184 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
1185 datasetTypeName = "random_data"
1187 # Create dimension records.
1188 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1189 butler.registry.insertDimensionData(
1190 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1191 )
1192 butler.registry.insertDimensionData(
1193 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
1194 )
1196 dimensions = butler.dimensions.extract(["instrument", "exposure"])
1197 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1198 butler.registry.registerDatasetType(datasetType)
1200 n_exposures = 5
1201 dayobs = 20210530
1203 for i in range(n_exposures):
1204 butler.registry.insertDimensionData(
1205 "exposure",
1206 {
1207 "instrument": "DummyCamComp",
1208 "id": i,
1209 "obs_id": f"exp{i}",
1210 "seq_num": i,
1211 "day_obs": dayobs,
1212 "physical_filter": "d-r",
1213 },
1214 )
1216 # Write some data.
1217 for i in range(n_exposures):
1218 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]}
1220 # Use the seq_num for the put to test rewriting.
1221 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1222 ref = butler.put(metric, datasetTypeName, dataId=dataId)
1224 # Check that the exposure is correct in the dataId
1225 self.assertEqual(ref.dataId["exposure"], i)
1227 # and check that we can get the dataset back with the same dataId
1228 new_metric = butler.get(datasetTypeName, dataId=dataId)
1229 self.assertEqual(new_metric, metric)
1232class FileDatastoreButlerTests(ButlerTests):
1233 """Common tests and specialization of ButlerTests for butlers backed
1234 by datastores that inherit from FileDatastore.
1235 """
1237 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool:
1238 """Check if file exists at a given path (relative to root).
1240 Test testPutTemplates verifies actual physical existance of the files
1241 in the requested location.
1242 """
1243 uri = ResourcePath(root, forceDirectory=True)
1244 return uri.join(relpath).exists()
1246 def testPutTemplates(self) -> None:
1247 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1248 butler = Butler(self.tmpConfigFile, run=self.default_run)
1250 # Add needed Dimensions
1251 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1252 butler.registry.insertDimensionData(
1253 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
1254 )
1255 butler.registry.insertDimensionData(
1256 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"}
1257 )
1258 butler.registry.insertDimensionData(
1259 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"}
1260 )
1262 # Create and store a dataset
1263 metric = makeExampleMetrics()
1265 # Create two almost-identical DatasetTypes (both will use default
1266 # template)
1267 dimensions = butler.dimensions.extract(["instrument", "visit"])
1268 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1269 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1270 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1272 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1273 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1275 # Put with exactly the data ID keys needed
1276 ref = butler.put(metric, "metric1", dataId1)
1277 uri = butler.getURI(ref)
1278 self.assertTrue(uri.exists())
1279 self.assertTrue(
1280 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle")
1281 )
1283 # Check the template based on dimensions
1284 if hasattr(butler._datastore, "templates"):
1285 butler._datastore.templates.validateTemplates([ref])
1287 # Put with extra data ID keys (physical_filter is an optional
1288 # dependency); should not change template (at least the way we're
1289 # defining them to behave now; the important thing is that they
1290 # must be consistent).
1291 ref = butler.put(metric, "metric2", dataId2)
1292 uri = butler.getURI(ref)
1293 self.assertTrue(uri.exists())
1294 self.assertTrue(
1295 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle")
1296 )
1298 # Check the template based on dimensions
1299 if hasattr(butler._datastore, "templates"):
1300 butler._datastore.templates.validateTemplates([ref])
1302 # Use a template that has a typo in dimension record metadata.
1303 # Easier to test with a butler that has a ref with records attached.
1304 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits")
1305 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1306 path = template.format(ref)
1307 self.assertEqual(path, f"a/v423/{ref.id}_fits")
1309 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits")
1310 with self.assertRaises(KeyError):
1311 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"):
1312 template.format(ref)
1314 # Now use a file template that will not result in unique filenames
1315 with self.assertRaises(FileTemplateValidationError):
1316 butler.put(metric, "metric3", dataId1)
1318 def testImportExport(self) -> None:
1319 # Run put/get tests just to create and populate a repo.
1320 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1321 self.runImportExportTest(storageClass)
1323 @unittest.expectedFailure
1324 def testImportExportVirtualComposite(self) -> None:
1325 # Run put/get tests just to create and populate a repo.
1326 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1327 self.runImportExportTest(storageClass)
1329 def runImportExportTest(self, storageClass: StorageClass) -> None:
1330 """Test exporting and importing.
1332 This test does an export to a temp directory and an import back
1333 into a new temp directory repo. It does not assume a posix datastore.
1334 """
1335 exportButler = self.runPutGetTest(storageClass, "test_metric")
1337 # Test that we must have a file extension.
1338 with self.assertRaises(ValueError):
1339 with exportButler.export(filename="dump", directory=".") as export:
1340 pass
1342 # Test that unknown format is not allowed.
1343 with self.assertRaises(ValueError):
1344 with exportButler.export(filename="dump.fits", directory=".") as export:
1345 pass
1347 # Test that the repo actually has at least one dataset.
1348 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1349 self.assertGreater(len(datasets), 0)
1350 # Add a DimensionRecord that's unused by those datasets.
1351 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1352 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1353 # Export and then import datasets.
1354 with safeTestTempDir(TESTDIR) as exportDir:
1355 exportFile = os.path.join(exportDir, "exports.yaml")
1356 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1357 export.saveDatasets(datasets)
1358 # Export the same datasets again. This should quietly do
1359 # nothing because of internal deduplication, and it shouldn't
1360 # complain about being asked to export the "htm7" elements even
1361 # though there aren't any in these datasets or in the database.
1362 export.saveDatasets(datasets, elements=["htm7"])
1363 # Save one of the data IDs again; this should be harmless
1364 # because of internal deduplication.
1365 export.saveDataIds([datasets[0].dataId])
1366 # Save some dimension records directly.
1367 export.saveDimensionData("skymap", [skymapRecord])
1368 self.assertTrue(os.path.exists(exportFile))
1369 with safeTestTempDir(TESTDIR) as importDir:
1370 # We always want this to be a local posix butler
1371 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1372 # Calling script.butlerImport tests the implementation of the
1373 # butler command line interface "import" subcommand. Functions
1374 # in the script folder are generally considered protected and
1375 # should not be used as public api.
1376 with open(exportFile) as f:
1377 script.butlerImport(
1378 importDir,
1379 export_file=f,
1380 directory=exportDir,
1381 transfer="auto",
1382 skip_dimensions=None,
1383 )
1384 importButler = Butler(importDir, run=self.default_run)
1385 for ref in datasets:
1386 with self.subTest(ref=ref):
1387 # Test for existence by passing in the DatasetType and
1388 # data ID separately, to avoid lookup by dataset_id.
1389 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId))
1390 self.assertEqual(
1391 list(importButler.registry.queryDimensionRecords("skymap")),
1392 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)],
1393 )
1395 def testRemoveRuns(self) -> None:
1396 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1397 butler = Butler(self.tmpConfigFile, writeable=True)
1398 # Load registry data with dimensions to hang datasets off of.
1399 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1400 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1401 # Add some RUN-type collection.
1402 run1 = "run1"
1403 butler.registry.registerRun(run1)
1404 run2 = "run2"
1405 butler.registry.registerRun(run2)
1406 # put a dataset in each
1407 metric = makeExampleMetrics()
1408 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1409 datasetType = self.addDatasetType(
1410 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1411 )
1412 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1413 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1414 uri1 = butler.getURI(ref1)
1415 uri2 = butler.getURI(ref2)
1417 with self.assertRaises(OrphanedRecordError):
1418 butler.registry.removeDatasetType(datasetType.name)
1420 # Remove from both runs with different values for unstore.
1421 butler.removeRuns([run1], unstore=True)
1422 butler.removeRuns([run2], unstore=False)
1423 # Should be nothing in registry for either one, and datastore should
1424 # not think either exists.
1425 with self.assertRaises(MissingCollectionError):
1426 butler.registry.getCollectionType(run1)
1427 with self.assertRaises(MissingCollectionError):
1428 butler.registry.getCollectionType(run2)
1429 self.assertFalse(butler.stored(ref1))
1430 self.assertFalse(butler.stored(ref2))
1431 # The ref we unstored should be gone according to the URI, but the
1432 # one we forgot should still be around.
1433 self.assertFalse(uri1.exists())
1434 self.assertTrue(uri2.exists())
1436 # Now that the collections have been pruned we can remove the
1437 # dataset type
1438 butler.registry.removeDatasetType(datasetType.name)
1440 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm:
1441 butler.registry.removeDatasetType(("test*", "test*"))
1442 self.assertIn("not defined", "\n".join(cm.output))
1445class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1446 """PosixDatastore specialization of a butler"""
1448 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1449 fullConfigKey: str | None = ".datastore.formatters"
1450 validationCanFail = True
1451 datastoreStr = ["/tmp"]
1452 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1453 registryStr = "/gen3.sqlite3"
1455 def testPathConstructor(self) -> None:
1456 """Independent test of constructor using PathLike."""
1457 butler = Butler(self.tmpConfigFile, run=self.default_run)
1458 self.assertIsInstance(butler, Butler)
1460 # And again with a Path object with the butler yaml
1461 path = pathlib.Path(self.tmpConfigFile)
1462 butler = Butler(path, writeable=False)
1463 self.assertIsInstance(butler, Butler)
1465 # And again with a Path object without the butler yaml
1466 # (making sure we skip it if the tmp config doesn't end
1467 # in butler.yaml -- which is the case for a subclass)
1468 if self.tmpConfigFile.endswith("butler.yaml"):
1469 path = pathlib.Path(os.path.dirname(self.tmpConfigFile))
1470 butler = Butler(path, writeable=False)
1471 self.assertIsInstance(butler, Butler)
1473 def testExportTransferCopy(self) -> None:
1474 """Test local export using all transfer modes"""
1475 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1476 exportButler = self.runPutGetTest(storageClass, "test_metric")
1477 # Test that the repo actually has at least one dataset.
1478 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1479 self.assertGreater(len(datasets), 0)
1480 uris = [exportButler.getURI(d) for d in datasets]
1481 assert isinstance(exportButler._datastore, FileDatastore)
1482 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]]
1484 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1486 for path in pathsInStore:
1487 # Assume local file system
1488 assert path is not None
1489 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}")
1491 for transfer in ("copy", "link", "symlink", "relsymlink"):
1492 with safeTestTempDir(TESTDIR) as exportDir:
1493 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export:
1494 export.saveDatasets(datasets)
1495 for path in pathsInStore:
1496 assert path is not None
1497 self.assertTrue(
1498 self.checkFileExists(exportDir, path),
1499 f"Check that mode {transfer} exported files",
1500 )
1502 def testPruneDatasets(self) -> None:
1503 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1504 butler = Butler(self.tmpConfigFile, writeable=True)
1505 assert isinstance(butler._datastore, FileDatastore)
1506 # Load registry data with dimensions to hang datasets off of.
1507 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1508 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1509 # Add some RUN-type collections.
1510 run1 = "run1"
1511 butler.registry.registerRun(run1)
1512 run2 = "run2"
1513 butler.registry.registerRun(run2)
1514 # put some datasets. ref1 and ref2 have the same data ID, and are in
1515 # different runs. ref3 has a different data ID.
1516 metric = makeExampleMetrics()
1517 dimensions = butler.dimensions.extract(["instrument", "physical_filter"])
1518 datasetType = self.addDatasetType(
1519 "prune_collections_test_dataset", dimensions, storageClass, butler.registry
1520 )
1521 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1522 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1523 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1525 many_stored = butler.stored_many([ref1, ref2, ref3])
1526 for ref, stored in many_stored.items():
1527 self.assertTrue(stored, f"Ref {ref} should be stored")
1529 many_exists = butler._exists_many([ref1, ref2, ref3])
1530 for ref, exists in many_exists.items():
1531 self.assertTrue(exists, f"Checking ref {ref} exists.")
1532 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored")
1534 # Simple prune.
1535 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1536 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1))
1538 many_stored = butler.stored_many([ref1, ref2, ref3])
1539 for ref, stored in many_stored.items():
1540 self.assertFalse(stored, f"Ref {ref} should not be stored")
1542 many_exists = butler._exists_many([ref1, ref2, ref3])
1543 for ref, exists in many_exists.items():
1544 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored")
1546 # Put data back.
1547 ref1_new = butler.put(metric, ref1)
1548 self.assertEqual(ref1_new, ref1) # Reuses original ID.
1549 ref2 = butler.put(metric, ref2)
1551 many_stored = butler.stored_many([ref1, ref2, ref3])
1552 self.assertTrue(many_stored[ref1])
1553 self.assertTrue(many_stored[ref2])
1554 self.assertFalse(many_stored[ref3])
1556 ref3 = butler.put(metric, ref3)
1558 many_exists = butler._exists_many([ref1, ref2, ref3])
1559 for ref, exists in many_exists.items():
1560 self.assertTrue(exists, f"Ref {ref} should not be stored")
1562 # Clear out the datasets from registry and start again.
1563 refs = [ref1, ref2, ref3]
1564 butler.pruneDatasets(refs, purge=True, unstore=True)
1565 for ref in refs:
1566 butler.put(metric, ref)
1568 # Confirm we can retrieve deferred.
1569 dref1 = butler.getDeferred(ref1) # known and exists
1570 metric1 = dref1.get()
1571 self.assertEqual(metric1, metric)
1573 # Test different forms of file availability.
1574 # Need to be in a state where:
1575 # - one ref just has registry record.
1576 # - one ref has a missing file but a datastore record.
1577 # - one ref has a missing datastore record but file is there.
1578 # - one ref does not exist anywhere.
1579 # Do not need to test a ref that has everything since that is tested
1580 # above.
1581 ref0 = DatasetRef(
1582 datasetType,
1583 DataCoordinate.standardize(
1584 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions
1585 ),
1586 run=run1,
1587 )
1589 # Delete from datastore and retain in Registry.
1590 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False)
1592 # File has been removed.
1593 uri2 = butler.getURI(ref2)
1594 uri2.remove()
1596 # Datastore has lost track.
1597 butler._datastore.forget([ref3])
1599 # First test with a standard butler.
1600 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1601 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1602 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1603 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1604 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED)
1606 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False)
1607 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1608 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1609 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN)
1610 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED)
1611 self.assertTrue(exists_many[ref2])
1613 # Check that per-ref query gives the same answer as many query.
1614 for ref, exists in exists_many.items():
1615 self.assertEqual(butler.exists(ref, full_check=False), exists)
1617 # Get deferred checks for existence before it allows it to be
1618 # retrieved.
1619 with self.assertRaises(LookupError):
1620 butler.getDeferred(ref3) # not known, file exists
1621 dref2 = butler.getDeferred(ref2) # known but file missing
1622 with self.assertRaises(FileNotFoundError):
1623 dref2.get()
1625 # Test again with a trusting butler.
1626 butler._datastore.trustGetRequest = True
1627 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True)
1628 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED)
1629 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED)
1630 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE)
1631 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT)
1633 # When trusting we can get a deferred dataset handle that is not
1634 # known but does exist.
1635 dref3 = butler.getDeferred(ref3)
1636 metric3 = dref3.get()
1637 self.assertEqual(metric3, metric)
1639 # Check that per-ref query gives the same answer as many query.
1640 for ref, exists in exists_many.items():
1641 self.assertEqual(butler.exists(ref, full_check=True), exists)
1643 # Create a ref that surprisingly has the UUID of an existing ref
1644 # but is not the same.
1645 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id)
1646 with self.assertRaises(ValueError):
1647 butler.exists(ref_bad)
1649 # Create a ref that has a compatible storage class.
1650 ref_compat = ref2.overrideStorageClass("StructuredDataDict")
1651 exists = butler.exists(ref_compat)
1652 self.assertEqual(exists, exists_many[ref2])
1654 # Remove everything and start from scratch.
1655 butler._datastore.trustGetRequest = False
1656 butler.pruneDatasets(refs, purge=True, unstore=True)
1657 for ref in refs:
1658 butler.put(metric, ref)
1660 # These tests mess directly with the trash table and can leave the
1661 # datastore in an odd state. Do them at the end.
1662 # Check that in normal mode, deleting the record will lead to
1663 # trash not touching the file.
1664 uri1 = butler.getURI(ref1)
1665 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table
1666 butler._datastore.forget([ref1])
1667 butler._datastore.trash(ref1)
1668 butler._datastore.emptyTrash()
1669 self.assertTrue(uri1.exists())
1670 uri1.remove() # Clean it up.
1672 # Simulate execution butler setup by deleting the datastore
1673 # record but keeping the file around and trusting.
1674 butler._datastore.trustGetRequest = True
1675 uris = butler.get_many_uris([ref2, ref3])
1676 uri2 = uris[ref2].primaryURI
1677 uri3 = uris[ref3].primaryURI
1678 self.assertTrue(uri2.exists())
1679 self.assertTrue(uri3.exists())
1681 # Remove the datastore record.
1682 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table
1683 butler._datastore.forget([ref2])
1684 self.assertTrue(uri2.exists())
1685 butler._datastore.trash([ref2, ref3])
1686 # Immediate removal for ref2 file
1687 self.assertFalse(uri2.exists())
1688 # But ref3 has to wait for the empty.
1689 self.assertTrue(uri3.exists())
1690 butler._datastore.emptyTrash()
1691 self.assertFalse(uri3.exists())
1693 # Clear out the datasets from registry.
1694 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1696 def testPytypeCoercion(self) -> None:
1697 """Test python type coercion on Butler.get and put."""
1698 # Store some data with the normal example storage class.
1699 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1700 datasetTypeName = "test_metric"
1701 butler = self.runPutGetTest(storageClass, datasetTypeName)
1703 dataId = {"instrument": "DummyCamComp", "visit": 423}
1704 metric = butler.get(datasetTypeName, dataId=dataId)
1705 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample")
1707 datasetType_ori = butler.registry.getDatasetType(datasetTypeName)
1708 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents")
1710 # Now need to hack the registry dataset type definition.
1711 # There is no API for this.
1712 assert isinstance(butler._registry, SqlRegistry)
1713 manager = butler._registry._managers.datasets
1714 assert hasattr(manager, "_db") and hasattr(manager, "_static")
1715 manager._db.update(
1716 manager._static.dataset_type,
1717 {"name": datasetTypeName},
1718 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"},
1719 )
1721 # Force reset of dataset type cache
1722 butler.registry.refresh()
1724 datasetType_new = butler.registry.getDatasetType(datasetTypeName)
1725 self.assertEqual(datasetType_new.name, datasetType_ori.name)
1726 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel")
1728 metric_model = butler.get(datasetTypeName, dataId=dataId)
1729 self.assertNotEqual(type(metric_model), type(metric))
1730 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel")
1732 # Put the model and read it back to show that everything now
1733 # works as normal.
1734 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424)
1735 metric_model_new = butler.get(metric_ref)
1736 self.assertEqual(metric_model_new, metric_model)
1738 # Hack the storage class again to something that will fail on the
1739 # get with no conversion class.
1740 manager._db.update(
1741 manager._static.dataset_type,
1742 {"name": datasetTypeName},
1743 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"},
1744 )
1745 butler.registry.refresh()
1747 with self.assertRaises(ValueError):
1748 butler.get(datasetTypeName, dataId=dataId)
1751@unittest.skipUnless(testing is not None, "testing.postgresql module not found")
1752class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1753 """PosixDatastore specialization of a butler using Postgres"""
1755 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1756 fullConfigKey = ".datastore.formatters"
1757 validationCanFail = True
1758 datastoreStr = ["/tmp"]
1759 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1760 registryStr = "PostgreSQL@test"
1761 postgresql: Any
1763 @staticmethod
1764 def _handler(postgresql: Any) -> None:
1765 engine = sqlalchemy.engine.create_engine(postgresql.url())
1766 with engine.begin() as connection:
1767 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;"))
1769 @classmethod
1770 def setUpClass(cls) -> None:
1771 # Create the postgres test server.
1772 cls.postgresql = testing.postgresql.PostgresqlFactory(
1773 cache_initialized_db=True, on_initialized=cls._handler
1774 )
1775 super().setUpClass()
1777 @classmethod
1778 def tearDownClass(cls) -> None:
1779 # Clean up any lingering SQLAlchemy engines/connections
1780 # so they're closed before we shut down the server.
1781 gc.collect()
1782 cls.postgresql.clear_cache()
1783 super().tearDownClass()
1785 def setUp(self) -> None:
1786 self.server = self.postgresql()
1788 # Need to add a registry section to the config.
1789 self._temp_config = False
1790 config = Config(self.configFile)
1791 config["registry", "db"] = self.server.url()
1792 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh:
1793 config.dump(fh)
1794 self.configFile = fh.name
1795 self._temp_config = True
1796 super().setUp()
1798 def tearDown(self) -> None:
1799 self.server.stop()
1800 if self._temp_config and os.path.exists(self.configFile):
1801 os.remove(self.configFile)
1802 super().tearDown()
1804 def testMakeRepo(self) -> None:
1805 # The base class test assumes that it's using sqlite and assumes
1806 # the config file is acceptable to sqlite.
1807 raise unittest.SkipTest("Postgres config is not compatible with this test.")
1810class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1811 """InMemoryDatastore specialization of a butler"""
1813 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1814 fullConfigKey = None
1815 useTempRoot = False
1816 validationCanFail = False
1817 datastoreStr = ["datastore='InMemory"]
1818 datastoreName = ["InMemoryDatastore@"]
1819 registryStr = "/gen3.sqlite3"
1821 def testIngest(self) -> None:
1822 pass
1825class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1826 """PosixDatastore specialization"""
1828 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1829 fullConfigKey = ".datastore.datastores.1.formatters"
1830 validationCanFail = True
1831 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1832 datastoreName = [
1833 "InMemoryDatastore@",
1834 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1835 "SecondDatastore",
1836 ]
1837 registryStr = "/gen3.sqlite3"
1840class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1841 """Test that a yaml file in one location can refer to a root in another."""
1843 datastoreStr = ["dir1"]
1844 # Disable the makeRepo test since we are deliberately not using
1845 # butler.yaml as the config name.
1846 fullConfigKey = None
1848 def setUp(self) -> None:
1849 self.root = makeTestTempDir(TESTDIR)
1851 # Make a new repository in one place
1852 self.dir1 = os.path.join(self.root, "dir1")
1853 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1855 # Move the yaml file to a different place and add a "root"
1856 self.dir2 = os.path.join(self.root, "dir2")
1857 os.makedirs(self.dir2, exist_ok=True)
1858 configFile1 = os.path.join(self.dir1, "butler.yaml")
1859 config = Config(configFile1)
1860 config["root"] = self.dir1
1861 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1862 config.dumpToUri(configFile2)
1863 os.remove(configFile1)
1864 self.tmpConfigFile = configFile2
1866 def testFileLocations(self) -> None:
1867 self.assertNotEqual(self.dir1, self.dir2)
1868 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1869 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1870 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1873class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1874 """Test that a config file created by makeRepo outside of repo works."""
1876 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1878 def setUp(self) -> None:
1879 self.root = makeTestTempDir(TESTDIR)
1880 self.root2 = makeTestTempDir(TESTDIR)
1882 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1883 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1885 def tearDown(self) -> None:
1886 if os.path.exists(self.root2):
1887 shutil.rmtree(self.root2, ignore_errors=True)
1888 super().tearDown()
1890 def testConfigExistence(self) -> None:
1891 c = Config(self.tmpConfigFile)
1892 uri_config = ResourcePath(c["root"])
1893 uri_expected = ResourcePath(self.root, forceDirectory=True)
1894 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1895 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1897 def testPutGet(self) -> None:
1898 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1899 self.runPutGetTest(storageClass, "test_metric")
1902class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1903 """Test that a config file created by makeRepo outside of repo works."""
1905 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1907 def setUp(self) -> None:
1908 self.root = makeTestTempDir(TESTDIR)
1909 self.root2 = makeTestTempDir(TESTDIR)
1911 self.tmpConfigFile = self.root2
1912 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1914 def testConfigExistence(self) -> None:
1915 # Append the yaml file else Config constructor does not know the file
1916 # type.
1917 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1918 super().testConfigExistence()
1921class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1922 """Test that a config file created by makeRepo outside of repo works."""
1924 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1926 def setUp(self) -> None:
1927 self.root = makeTestTempDir(TESTDIR)
1928 self.root2 = makeTestTempDir(TESTDIR)
1930 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl()
1931 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile)
1934@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1935class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1936 """S3Datastore specialization of a butler; an S3 storage Datastore +
1937 a local in-memory SqlRegistry.
1938 """
1940 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1941 fullConfigKey = None
1942 validationCanFail = True
1944 bucketName = "anybucketname"
1945 """Name of the Bucket that will be used in the tests. The name is read from
1946 the config file used with the tests during set-up.
1947 """
1949 root = "butlerRoot/"
1950 """Root repository directory expected to be used in case useTempRoot=False.
1951 Otherwise the root is set to a 20 characters long randomly generated string
1952 during set-up.
1953 """
1955 datastoreStr = [f"datastore={root}"]
1956 """Contains all expected root locations in a format expected to be
1957 returned by Butler stringification.
1958 """
1960 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1961 """The expected format of the S3 Datastore string."""
1963 registryStr = "/gen3.sqlite3"
1964 """Expected format of the Registry string."""
1966 mock_s3 = mock_s3()
1967 """The mocked s3 interface from moto."""
1969 def genRoot(self) -> str:
1970 """Return a random string of len 20 to serve as a root
1971 name for the temporary bucket repo.
1973 This is equivalent to tempfile.mkdtemp as this is what self.root
1974 becomes when useTempRoot is True.
1975 """
1976 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))
1977 return rndstr + "/"
1979 def setUp(self) -> None:
1980 config = Config(self.configFile)
1981 uri = ResourcePath(config[".datastore.datastore.root"])
1982 self.bucketName = uri.netloc
1984 # Enable S3 mocking of tests.
1985 self.mock_s3.start()
1987 # set up some fake credentials if they do not exist
1988 self.usingDummyCredentials = setAwsEnvCredentials()
1990 if self.useTempRoot:
1991 self.root = self.genRoot()
1992 rooturi = f"s3://{self.bucketName}/{self.root}"
1993 config.update({"datastore": {"datastore": {"root": rooturi}}})
1995 # need local folder to store registry database
1996 self.reg_dir = makeTestTempDir(TESTDIR)
1997 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1999 # MOTO needs to know that we expect Bucket bucketname to exist
2000 # (this used to be the class attribute bucketName)
2001 s3 = boto3.resource("s3")
2002 s3.create_bucket(Bucket=self.bucketName)
2004 self.datastoreStr = [f"datastore='{rooturi}'"]
2005 self.datastoreName = [f"FileDatastore@{rooturi}"]
2006 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
2007 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
2009 def tearDown(self) -> None:
2010 s3 = boto3.resource("s3")
2011 bucket = s3.Bucket(self.bucketName)
2012 try:
2013 bucket.objects.all().delete()
2014 except botocore.exceptions.ClientError as e:
2015 if e.response["Error"]["Code"] == "404":
2016 # the key was not reachable - pass
2017 pass
2018 else:
2019 raise
2021 bucket = s3.Bucket(self.bucketName)
2022 bucket.delete()
2024 # Stop the S3 mock.
2025 self.mock_s3.stop()
2027 # unset any potentially set dummy credentials
2028 if self.usingDummyCredentials:
2029 unsetAwsEnvCredentials()
2031 if self.reg_dir is not None and os.path.exists(self.reg_dir):
2032 shutil.rmtree(self.reg_dir, ignore_errors=True)
2034 if self.useTempRoot and os.path.exists(self.root):
2035 shutil.rmtree(self.root, ignore_errors=True)
2037 super().tearDown()
2040class PosixDatastoreTransfers(unittest.TestCase):
2041 """Test data transfers between butlers.
2043 Test for different managers. UUID to UUID and integer to integer are
2044 tested. UUID to integer is not supported since we do not currently
2045 want to allow that. Integer to UUID is supported with the caveat
2046 that UUID4 will be generated and this will be incorrect for raw
2047 dataset types. The test ignores that.
2048 """
2050 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2051 storageClassFactory: StorageClassFactory
2053 @classmethod
2054 def setUpClass(cls) -> None:
2055 cls.storageClassFactory = StorageClassFactory()
2056 cls.storageClassFactory.addFromConfig(cls.configFile)
2058 def setUp(self) -> None:
2059 self.root = makeTestTempDir(TESTDIR)
2060 self.config = Config(self.configFile)
2062 def tearDown(self) -> None:
2063 removeTestTempDir(self.root)
2065 def create_butler(self, manager: str, label: str) -> Butler:
2066 config = Config(self.configFile)
2067 config["registry", "managers", "datasets"] = manager
2068 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True)
2070 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None:
2071 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
2072 if manager1 is None:
2073 manager1 = default
2074 if manager2 is None:
2075 manager2 = default
2076 self.source_butler = self.create_butler(manager1, "1")
2077 self.target_butler = self.create_butler(manager2, "2")
2079 def testTransferUuidToUuid(self) -> None:
2080 self.create_butlers()
2081 self.assertButlerTransfers()
2083 def _enable_trust(self, datastore: Datastore) -> None:
2084 datastores = getattr(datastore, "datastores", [datastore])
2085 for this_datastore in datastores:
2086 if hasattr(this_datastore, "trustGetRequest"):
2087 this_datastore.trustGetRequest = True
2089 def testTransferMissing(self) -> None:
2090 """Test transfers where datastore records are missing.
2092 This is how execution butler works.
2093 """
2094 self.create_butlers()
2096 # Configure the source butler to allow trust.
2097 self._enable_trust(self.source_butler._datastore)
2099 self.assertButlerTransfers(purge=True)
2101 def testTransferMissingDisassembly(self) -> None:
2102 """Test transfers where datastore records are missing.
2104 This is how execution butler works.
2105 """
2106 self.create_butlers()
2108 # Configure the source butler to allow trust.
2109 self._enable_trust(self.source_butler._datastore)
2111 # Test disassembly.
2112 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
2114 def testAbsoluteURITransferDirect(self) -> None:
2115 """Test transfer using an absolute URI."""
2116 self._absolute_transfer("auto")
2118 def testAbsoluteURITransferCopy(self) -> None:
2119 """Test transfer using an absolute URI."""
2120 self._absolute_transfer("copy")
2122 def _absolute_transfer(self, transfer: str) -> None:
2123 self.create_butlers()
2125 storageClassName = "StructuredData"
2126 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2127 datasetTypeName = "random_data"
2128 run = "run1"
2129 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2131 dimensions = self.source_butler.dimensions.extract(())
2132 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2133 self.source_butler.registry.registerDatasetType(datasetType)
2135 metrics = makeExampleMetrics()
2136 with ResourcePath.temporary_uri(suffix=".json") as temp:
2137 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions)
2138 source_refs = [DatasetRef(datasetType, dataId, run=run)]
2139 temp.write(json.dumps(metrics.exportAsDict()).encode())
2140 dataset = FileDataset(path=temp, refs=source_refs)
2141 self.source_butler.ingest(dataset, transfer="direct")
2143 self.target_butler.transfer_from(
2144 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer
2145 )
2147 uri = self.target_butler.getURI(dataset.refs[0])
2148 if transfer == "auto":
2149 self.assertEqual(uri, temp)
2150 else:
2151 self.assertNotEqual(uri, temp)
2153 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None:
2154 """Test that a run can be transferred to another butler."""
2155 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
2156 datasetTypeName = "random_data"
2158 # Test will create 3 collections and we will want to transfer
2159 # two of those three.
2160 runs = ["run1", "run2", "other"]
2162 # Also want to use two different dataset types to ensure that
2163 # grouping works.
2164 datasetTypeNames = ["random_data", "random_data_2"]
2166 # Create the run collections in the source butler.
2167 for run in runs:
2168 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
2170 # Create dimensions in source butler.
2171 n_exposures = 30
2172 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
2173 self.source_butler.registry.insertDimensionData(
2174 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"}
2175 )
2176 self.source_butler.registry.insertDimensionData(
2177 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"}
2178 )
2180 for i in range(n_exposures):
2181 self.source_butler.registry.insertDimensionData(
2182 "exposure",
2183 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"},
2184 )
2186 # Create dataset types in the source butler.
2187 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"])
2188 for datasetTypeName in datasetTypeNames:
2189 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2190 self.source_butler.registry.registerDatasetType(datasetType)
2192 # Write a dataset to an unrelated run -- this will ensure that
2193 # we are rewriting integer dataset ids in the target if necessary.
2194 # Will not be relevant for UUID.
2195 run = "distraction"
2196 butler = Butler(butler=self.source_butler, run=run)
2197 butler.put(
2198 makeExampleMetrics(),
2199 datasetTypeName,
2200 exposure=1,
2201 instrument="DummyCamComp",
2202 physical_filter="d-r",
2203 )
2205 # Write some example metrics to the source
2206 butler = Butler(butler=self.source_butler)
2208 # Set of DatasetRefs that should be in the list of refs to transfer
2209 # but which will not be transferred.
2210 deleted: set[DatasetRef] = set()
2212 n_expected = 20 # Number of datasets expected to be transferred
2213 source_refs = []
2214 for i in range(n_exposures):
2215 # Put a third of datasets into each collection, only retain
2216 # two thirds.
2217 index = i % 3
2218 run = runs[index]
2219 datasetTypeName = datasetTypeNames[i % 2]
2221 metric = MetricsExample(
2222 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)]
2223 )
2224 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"}
2225 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
2227 # Remove the datastore record using low-level API, but only
2228 # for a specific index.
2229 if purge and index == 1:
2230 # For one of these delete the file as well.
2231 # This allows the "missing" code to filter the
2232 # file out.
2233 # Access the individual datastores.
2234 datastores = []
2235 if hasattr(butler._datastore, "datastores"):
2236 datastores.extend(butler._datastore.datastores)
2237 else:
2238 datastores.append(butler._datastore)
2240 if not deleted:
2241 # For a chained datastore we need to remove
2242 # files in each chain.
2243 for datastore in datastores:
2244 # The file might not be known to the datastore
2245 # if constraints are used.
2246 try:
2247 primary, uris = datastore.getURIs(ref)
2248 except FileNotFoundError:
2249 continue
2250 if primary and primary.scheme != "mem":
2251 primary.remove()
2252 for uri in uris.values():
2253 if uri.scheme != "mem":
2254 uri.remove()
2255 n_expected -= 1
2256 deleted.add(ref)
2258 # Remove the datastore record.
2259 for datastore in datastores:
2260 if hasattr(datastore, "removeStoredItemInfo"):
2261 datastore.removeStoredItemInfo(ref)
2263 if index < 2:
2264 source_refs.append(ref)
2265 if ref not in deleted:
2266 new_metric = butler.get(ref)
2267 self.assertEqual(new_metric, metric)
2269 # Create some bad dataset types to ensure we check for inconsistent
2270 # definitions.
2271 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
2272 for datasetTypeName in datasetTypeNames:
2273 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
2274 self.target_butler.registry.registerDatasetType(datasetType)
2275 with self.assertRaises(ConflictingDefinitionError) as cm:
2276 self.target_butler.transfer_from(self.source_butler, source_refs)
2277 self.assertIn("dataset type differs", str(cm.exception))
2279 # And remove the bad definitions.
2280 for datasetTypeName in datasetTypeNames:
2281 self.target_butler.registry.removeDatasetType(datasetTypeName)
2283 # Transfer without creating dataset types should fail.
2284 with self.assertRaises(KeyError):
2285 self.target_butler.transfer_from(self.source_butler, source_refs)
2287 # Transfer without creating dimensions should fail.
2288 with self.assertRaises(ConflictingDefinitionError) as cm:
2289 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True)
2290 self.assertIn("dimension", str(cm.exception))
2292 # The failed transfer above leaves registry in an inconsistent
2293 # state because the run is created but then rolled back without
2294 # the collection cache being cleared. For now force a refresh.
2295 # Can remove with DM-35498.
2296 self.target_butler.registry.refresh()
2298 # Now transfer them to the second butler, including dimensions.
2299 with self.assertLogs(level=logging.DEBUG) as log_cm:
2300 transferred = self.target_butler.transfer_from(
2301 self.source_butler,
2302 source_refs,
2303 register_dataset_types=True,
2304 transfer_dimensions=True,
2305 )
2306 self.assertEqual(len(transferred), n_expected)
2307 log_output = ";".join(log_cm.output)
2309 # A ChainedDatastore will use the in-memory datastore for mexists
2310 # so we can not rely on the mexists log message.
2311 self.assertIn("Number of datastore records found in source", log_output)
2312 self.assertIn("Creating output run", log_output)
2314 # Do the transfer twice to ensure that it will do nothing extra.
2315 # Only do this if purge=True because it does not work for int
2316 # dataset_id.
2317 if purge:
2318 # This should not need to register dataset types.
2319 transferred = self.target_butler.transfer_from(self.source_butler, source_refs)
2320 self.assertEqual(len(transferred), n_expected)
2322 # Also do an explicit low-level transfer to trigger some
2323 # edge cases.
2324 with self.assertLogs(level=logging.DEBUG) as log_cm:
2325 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs)
2326 log_output = ";".join(log_cm.output)
2327 self.assertIn("no file artifacts exist", log_output)
2329 with self.assertRaises((TypeError, AttributeError)):
2330 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore
2332 with self.assertRaises(ValueError):
2333 self.target_butler._datastore.transfer_from(
2334 self.source_butler._datastore, source_refs, transfer="split"
2335 )
2337 # Now try to get the same refs from the new butler.
2338 for ref in source_refs:
2339 if ref not in deleted:
2340 new_metric = self.target_butler.get(ref)
2341 old_metric = self.source_butler.get(ref)
2342 self.assertEqual(new_metric, old_metric)
2344 # Now prune run2 collection and create instead a CHAINED collection.
2345 # This should block the transfer.
2346 self.target_butler.removeRuns(["run2"], unstore=True)
2347 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
2348 with self.assertRaises(CollectionTypeError):
2349 # Re-importing the run1 datasets can be problematic if they
2350 # use integer IDs so filter those out.
2351 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
2352 self.target_butler.transfer_from(self.source_butler, to_transfer)
2355class ChainedDatastoreTransfers(PosixDatastoreTransfers):
2356 """Test transfers using a chained datastore."""
2358 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
2361class NullDatastoreTestCase(unittest.TestCase):
2362 """Test that we can fall back to a null datastore."""
2364 # Need a good config to create the repo.
2365 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
2366 storageClassFactory: StorageClassFactory
2368 @classmethod
2369 def setUpClass(cls) -> None:
2370 cls.storageClassFactory = StorageClassFactory()
2371 cls.storageClassFactory.addFromConfig(cls.configFile)
2373 def setUp(self) -> None:
2374 """Create a new butler root for each test."""
2375 self.root = makeTestTempDir(TESTDIR)
2376 Butler.makeRepo(self.root, config=Config(self.configFile))
2378 def tearDown(self) -> None:
2379 removeTestTempDir(self.root)
2381 def test_fallback(self) -> None:
2382 # Read the butler config and mess with the datastore section.
2383 bad_config = Config(os.path.join(self.root, "butler.yaml"))
2384 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
2386 with self.assertRaises(RuntimeError):
2387 Butler(bad_config)
2389 butler = Butler(bad_config, writeable=True, without_datastore=True)
2390 self.assertIsInstance(butler._datastore, NullDatastore)
2392 # Check that registry is working.
2393 butler.registry.registerRun("MYRUN")
2394 collections = butler.registry.queryCollections(...)
2395 self.assertIn("MYRUN", set(collections))
2397 # Create a ref.
2398 dimensions = butler.dimensions.extract([])
2399 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
2400 datasetTypeName = "metric"
2401 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
2402 butler.registry.registerDatasetType(datasetType)
2403 ref = DatasetRef(datasetType, {}, run="MYRUN")
2405 # Check that datastore will complain.
2406 with self.assertRaises(FileNotFoundError):
2407 butler.get(ref)
2408 with self.assertRaises(FileNotFoundError):
2409 butler.getURI(ref)
2412def setup_module(module: types.ModuleType) -> None:
2413 """Set up the module for pytest."""
2414 clean_environment()
2417if __name__ == "__main__":
2418 clean_environment()
2419 unittest.main()