Coverage for tests/test_butler.py: 15%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import logging
26import os
27import posixpath
28import unittest
29import tempfile
30import shutil
31import pickle
32import string
33import random
34import time
35import socket
37try:
38 import boto3
39 import botocore
40 from moto import mock_s3
41except ImportError:
42 boto3 = None
44 def mock_s3(cls):
45 """A no-op decorator in case moto mock_s3 can not be imported.
46 """
47 return cls
49try:
50 from cheroot import wsgi
51 from wsgidav.wsgidav_app import WsgiDAVApp
52except ImportError:
53 WsgiDAVApp = None
55import astropy.time
56from threading import Thread
57from tempfile import gettempdir
58from lsst.utils import doImport
59from lsst.daf.butler import Butler, Config, ButlerConfig
60from lsst.daf.butler import StorageClassFactory
61from lsst.daf.butler import DatasetType, DatasetRef, DatasetIdGenEnum
62from lsst.daf.butler import FileTemplateValidationError, ValidationError
63from lsst.daf.butler import FileDataset
64from lsst.daf.butler import CollectionSearch, CollectionType
65from lsst.daf.butler import ButlerURI
66from lsst.daf.butler import script
67from lsst.daf.butler.registry import MissingCollectionError, ConflictingDefinitionError
68from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
69from lsst.daf.butler.core._butlerUri.s3utils import (setAwsEnvCredentials,
70 unsetAwsEnvCredentials)
71from lsst.daf.butler.core._butlerUri.http import isWebdavEndpoint
73from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
74from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir
76TESTDIR = os.path.abspath(os.path.dirname(__file__))
79def makeExampleMetrics():
80 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
81 {"a": [1, 2, 3],
82 "b": {"blue": 5, "red": "green"}},
83 [563, 234, 456.7, 752, 8, 9, 27]
84 )
87class TransactionTestError(Exception):
88 """Specific error for testing transactions, to prevent misdiagnosing
89 that might otherwise occur when a standard exception is used.
90 """
91 pass
94class ButlerConfigTests(unittest.TestCase):
95 """Simple tests for ButlerConfig that are not tested in other test cases.
96 """
98 def testSearchPath(self):
99 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
100 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
101 config1 = ButlerConfig(configFile)
102 self.assertNotIn("testConfigs", "\n".join(cm.output))
104 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
105 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
106 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
107 self.assertIn("testConfigs", "\n".join(cm.output))
109 key = ("datastore", "records", "table")
110 self.assertNotEqual(config1[key], config2[key])
111 self.assertEqual(config2[key], "override_record")
114class ButlerPutGetTests:
115 """Helper method for running a suite of put/get tests from different
116 butler configurations."""
118 root = None
120 @staticmethod
121 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
122 """Create a DatasetType and register it
123 """
124 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
125 registry.registerDatasetType(datasetType)
126 return datasetType
128 @classmethod
129 def setUpClass(cls):
130 cls.storageClassFactory = StorageClassFactory()
131 cls.storageClassFactory.addFromConfig(cls.configFile)
133 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None):
134 datasetType = datasetRef.datasetType
135 dataId = datasetRef.dataId
136 deferred = butler.getDirectDeferred(datasetRef)
138 for component in components:
139 compTypeName = datasetType.componentTypeName(component)
140 result = butler.get(compTypeName, dataId, collections=collections)
141 self.assertEqual(result, getattr(reference, component))
142 result_deferred = deferred.get(component=component)
143 self.assertEqual(result_deferred, result)
145 def tearDown(self):
146 removeTestTempDir(self.root)
148 def runPutGetTest(self, storageClass, datasetTypeName):
149 # New datasets will be added to run and tag, but we will only look in
150 # tag when looking up datasets.
151 run = "ingest"
152 butler = Butler(self.tmpConfigFile, run=run)
154 collections = set(butler.registry.queryCollections())
155 self.assertEqual(collections, set([run]))
157 # Create and register a DatasetType
158 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
160 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
162 # Add needed Dimensions
163 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
164 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
165 "name": "d-r",
166 "band": "R"})
167 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
168 "id": 1,
169 "name": "default"})
170 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai")
171 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai")
172 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
173 "name": "fourtwentythree", "physical_filter": "d-r",
174 "visit_system": 1, "datetime_begin": visit_start,
175 "datetime_end": visit_end})
177 # Add a second visit for some later tests
178 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 424,
179 "name": "fourtwentyfour", "physical_filter": "d-r",
180 "visit_system": 1})
182 # Create and store a dataset
183 metric = makeExampleMetrics()
184 dataId = {"instrument": "DummyCamComp", "visit": 423}
186 # Create a DatasetRef for put
187 refIn = DatasetRef(datasetType, dataId, id=None)
189 # Put with a preexisting id should fail
190 with self.assertRaises(ValueError):
191 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
193 # Put and remove the dataset once as a DatasetRef, once as a dataId,
194 # and once with a DatasetType
196 # Keep track of any collections we add and do not clean up
197 expected_collections = {run}
199 counter = 0
200 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
201 # Since we are using subTest we can get cascading failures
202 # here with the first attempt failing and the others failing
203 # immediately because the dataset already exists. Work around
204 # this by using a distinct run collection each time
205 counter += 1
206 this_run = f"put_run_{counter}"
207 butler.registry.registerCollection(this_run, type=CollectionType.RUN)
208 expected_collections.update({this_run})
210 with self.subTest(args=args):
211 ref = butler.put(metric, *args, run=this_run)
212 self.assertIsInstance(ref, DatasetRef)
214 # Test getDirect
215 metricOut = butler.getDirect(ref)
216 self.assertEqual(metric, metricOut)
217 # Test get
218 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run)
219 self.assertEqual(metric, metricOut)
220 # Test get with a datasetRef
221 metricOut = butler.get(ref, collections=this_run)
222 self.assertEqual(metric, metricOut)
223 # Test getDeferred with dataId
224 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get()
225 self.assertEqual(metric, metricOut)
226 # Test getDeferred with a datasetRef
227 metricOut = butler.getDeferred(ref, collections=this_run).get()
228 self.assertEqual(metric, metricOut)
229 # and deferred direct with ref
230 metricOut = butler.getDirectDeferred(ref).get()
231 self.assertEqual(metric, metricOut)
233 # Check we can get components
234 if storageClass.isComposite():
235 self.assertGetComponents(butler, ref,
236 ("summary", "data", "output"), metric,
237 collections=this_run)
239 # Can the artifacts themselves be retrieved?
240 if not butler.datastore.isEphemeral:
241 root_uri = ButlerURI(self.root)
243 for preserve_path in (True, False):
244 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/")
245 # Use copy so that we can test that overwrite
246 # protection works (using "auto" for File URIs would
247 # use hard links and subsequent transfer would work
248 # because it knows they are the same file).
249 transferred = butler.retrieveArtifacts([ref], destination,
250 preserve_path=preserve_path, transfer="copy")
251 self.assertGreater(len(transferred), 0)
252 artifacts = list(ButlerURI.findFileResources([destination]))
253 self.assertEqual(set(transferred), set(artifacts))
255 for artifact in transferred:
256 path_in_destination = artifact.relative_to(destination)
257 self.assertIsNotNone(path_in_destination)
259 # when path is not preserved there should not be
260 # any path separators.
261 num_seps = path_in_destination.count("/")
262 if preserve_path:
263 self.assertGreater(num_seps, 0)
264 else:
265 self.assertEqual(num_seps, 0)
267 primary_uri, secondary_uris = butler.datastore.getURIs(ref)
268 n_uris = len(secondary_uris)
269 if primary_uri:
270 n_uris += 1
271 self.assertEqual(len(artifacts), n_uris, "Comparing expected artifacts vs actual:"
272 f" {artifacts} vs {primary_uri} and {secondary_uris}")
274 if preserve_path:
275 # No need to run these twice
276 with self.assertRaises(ValueError):
277 butler.retrieveArtifacts([ref], destination, transfer="move")
279 with self.assertRaises(FileExistsError):
280 butler.retrieveArtifacts([ref], destination)
282 transferred_again = butler.retrieveArtifacts([ref], destination,
283 preserve_path=preserve_path,
284 overwrite=True)
285 self.assertEqual(set(transferred_again), set(transferred))
287 # Now remove the dataset completely.
288 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run)
289 # Lookup with original args should still fail.
290 with self.assertRaises(LookupError):
291 butler.datasetExists(*args, collections=this_run)
292 # getDirect() should still fail.
293 with self.assertRaises(FileNotFoundError):
294 butler.getDirect(ref)
295 # Registry shouldn't be able to find it by dataset_id anymore.
296 self.assertIsNone(butler.registry.getDataset(ref.id))
298 # Do explicit registry removal since we know they are
299 # empty
300 butler.registry.removeCollection(this_run)
301 expected_collections.remove(this_run)
303 # Put the dataset again, since the last thing we did was remove it
304 # and we want to use the default collection.
305 ref = butler.put(metric, refIn)
307 # Get with parameters
308 stop = 4
309 sliced = butler.get(ref, parameters={"slice": slice(stop)})
310 self.assertNotEqual(metric, sliced)
311 self.assertEqual(metric.summary, sliced.summary)
312 self.assertEqual(metric.output, sliced.output)
313 self.assertEqual(metric.data[:stop], sliced.data)
314 # getDeferred with parameters
315 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
316 self.assertNotEqual(metric, sliced)
317 self.assertEqual(metric.summary, sliced.summary)
318 self.assertEqual(metric.output, sliced.output)
319 self.assertEqual(metric.data[:stop], sliced.data)
320 # getDeferred with deferred parameters
321 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
322 self.assertNotEqual(metric, sliced)
323 self.assertEqual(metric.summary, sliced.summary)
324 self.assertEqual(metric.output, sliced.output)
325 self.assertEqual(metric.data[:stop], sliced.data)
327 if storageClass.isComposite():
328 # Check that components can be retrieved
329 metricOut = butler.get(ref.datasetType.name, dataId)
330 compNameS = ref.datasetType.componentTypeName("summary")
331 compNameD = ref.datasetType.componentTypeName("data")
332 summary = butler.get(compNameS, dataId)
333 self.assertEqual(summary, metric.summary)
334 data = butler.get(compNameD, dataId)
335 self.assertEqual(data, metric.data)
337 if "counter" in storageClass.derivedComponents:
338 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId)
339 self.assertEqual(count, len(data))
341 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId,
342 parameters={"slice": slice(stop)})
343 self.assertEqual(count, stop)
345 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
346 summary = butler.getDirect(compRef)
347 self.assertEqual(summary, metric.summary)
349 # Create a Dataset type that has the same name but is inconsistent.
350 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
351 self.storageClassFactory.getStorageClass("Config"))
353 # Getting with a dataset type that does not match registry fails
354 with self.assertRaises(ValueError):
355 butler.get(inconsistentDatasetType, dataId)
357 # Combining a DatasetRef with a dataId should fail
358 with self.assertRaises(ValueError):
359 butler.get(ref, dataId)
360 # Getting with an explicit ref should fail if the id doesn't match
361 with self.assertRaises(ValueError):
362 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
364 # Getting a dataset with unknown parameters should fail
365 with self.assertRaises(KeyError):
366 butler.get(ref, parameters={"unsupported": True})
368 # Check we have a collection
369 collections = set(butler.registry.queryCollections())
370 self.assertEqual(collections, expected_collections)
372 # Clean up to check that we can remove something that may have
373 # already had a component removed
374 butler.pruneDatasets([ref], unstore=True, purge=True)
376 # Check that we can configure a butler to accept a put even
377 # if it already has the dataset in registry.
378 ref = butler.put(metric, refIn)
380 # Repeat put will fail.
381 with self.assertRaises(ConflictingDefinitionError):
382 butler.put(metric, refIn)
384 # Remove the datastore entry.
385 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False)
387 # Put will still fail
388 with self.assertRaises(ConflictingDefinitionError):
389 butler.put(metric, refIn)
391 # Allow the put to succeed
392 butler._allow_put_of_predefined_dataset = True
393 ref2 = butler.put(metric, refIn)
394 self.assertEqual(ref2.id, ref.id)
396 # A second put will still fail but with a different exception
397 # than before.
398 with self.assertRaises(ConflictingDefinitionError):
399 butler.put(metric, refIn)
401 # Reset the flag to avoid confusion
402 butler._allow_put_of_predefined_dataset = False
404 # Leave the dataset in place since some downstream tests require
405 # something to be present
407 return butler
409 def testDeferredCollectionPassing(self):
410 # Construct a butler with no run or collection, but make it writeable.
411 butler = Butler(self.tmpConfigFile, writeable=True)
412 # Create and register a DatasetType
413 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
414 datasetType = self.addDatasetType("example", dimensions,
415 self.storageClassFactory.getStorageClass("StructuredData"),
416 butler.registry)
417 # Add needed Dimensions
418 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
419 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
420 "name": "d-r",
421 "band": "R"})
422 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
423 "name": "fourtwentythree", "physical_filter": "d-r"})
424 dataId = {"instrument": "DummyCamComp", "visit": 423}
425 # Create dataset.
426 metric = makeExampleMetrics()
427 # Register a new run and put dataset.
428 run = "deferred"
429 self.assertTrue(butler.registry.registerRun(run))
430 # Second time it will be allowed but indicate no-op
431 self.assertFalse(butler.registry.registerRun(run))
432 ref = butler.put(metric, datasetType, dataId, run=run)
433 # Putting with no run should fail with TypeError.
434 with self.assertRaises(TypeError):
435 butler.put(metric, datasetType, dataId)
436 # Dataset should exist.
437 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
438 # We should be able to get the dataset back, but with and without
439 # a deferred dataset handle.
440 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
441 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
442 # Trying to find the dataset without any collection is a TypeError.
443 with self.assertRaises(TypeError):
444 butler.datasetExists(datasetType, dataId)
445 with self.assertRaises(TypeError):
446 butler.get(datasetType, dataId)
447 # Associate the dataset with a different collection.
448 butler.registry.registerCollection("tagged")
449 butler.registry.associate("tagged", [ref])
450 # Deleting the dataset from the new collection should make it findable
451 # in the original collection.
452 butler.pruneDatasets([ref], tags=["tagged"])
453 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
456class ButlerTests(ButlerPutGetTests):
457 """Tests for Butler.
458 """
459 useTempRoot = True
461 def setUp(self):
462 """Create a new butler root for each test."""
463 self.root = makeTestTempDir(TESTDIR)
464 Butler.makeRepo(self.root, config=Config(self.configFile))
465 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
467 def testConstructor(self):
468 """Independent test of constructor.
469 """
470 butler = Butler(self.tmpConfigFile, run="ingest")
471 self.assertIsInstance(butler, Butler)
473 collections = set(butler.registry.queryCollections())
474 self.assertEqual(collections, {"ingest"})
476 butler2 = Butler(butler=butler, collections=["other"])
477 self.assertEqual(
478 butler2.collections,
479 CollectionSearch.fromExpression(["other"])
480 )
481 self.assertIsNone(butler2.run)
482 self.assertIs(butler.datastore, butler2.datastore)
484 def testBasicPutGet(self):
485 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
486 self.runPutGetTest(storageClass, "test_metric")
488 def testCompositePutGetConcrete(self):
490 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly")
491 butler = self.runPutGetTest(storageClass, "test_metric")
493 # Should *not* be disassembled
494 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
495 self.assertEqual(len(datasets), 1)
496 uri, components = butler.getURIs(datasets[0])
497 self.assertIsInstance(uri, ButlerURI)
498 self.assertFalse(components)
499 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
500 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
502 # Predicted dataset
503 dataId = {"instrument": "DummyCamComp", "visit": 424}
504 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
505 self.assertFalse(components)
506 self.assertIsInstance(uri, ButlerURI)
507 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
508 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
510 def testCompositePutGetVirtual(self):
511 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp")
512 butler = self.runPutGetTest(storageClass, "test_metric_comp")
514 # Should be disassembled
515 datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
516 self.assertEqual(len(datasets), 1)
517 uri, components = butler.getURIs(datasets[0])
519 if butler.datastore.isEphemeral:
520 # Never disassemble in-memory datastore
521 self.assertIsInstance(uri, ButlerURI)
522 self.assertFalse(components)
523 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}")
524 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}")
525 else:
526 self.assertIsNone(uri)
527 self.assertEqual(set(components), set(storageClass.components))
528 for compuri in components.values():
529 self.assertIsInstance(compuri, ButlerURI)
530 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}")
531 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}")
533 # Predicted dataset
534 dataId = {"instrument": "DummyCamComp", "visit": 424}
535 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True)
537 if butler.datastore.isEphemeral:
538 # Never disassembled
539 self.assertIsInstance(uri, ButlerURI)
540 self.assertFalse(components)
541 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}")
542 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}")
543 else:
544 self.assertIsNone(uri)
545 self.assertEqual(set(components), set(storageClass.components))
546 for compuri in components.values():
547 self.assertIsInstance(compuri, ButlerURI)
548 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}")
549 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")
551 def testIngest(self):
552 butler = Butler(self.tmpConfigFile, run="ingest")
554 # Create and register a DatasetType
555 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
557 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
558 datasetTypeName = "metric"
560 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
562 # Add needed Dimensions
563 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
564 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
565 "name": "d-r",
566 "band": "R"})
567 for detector in (1, 2):
568 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
569 "full_name": f"detector{detector}"})
571 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
572 "name": "fourtwentythree", "physical_filter": "d-r"},
573 {"instrument": "DummyCamComp", "id": 424,
574 "name": "fourtwentyfour", "physical_filter": "d-r"})
576 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter")
577 dataRoot = os.path.join(TESTDIR, "data", "basic")
578 datasets = []
579 for detector in (1, 2):
580 detector_name = f"detector_{detector}"
581 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
582 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
583 # Create a DatasetRef for ingest
584 refIn = DatasetRef(datasetType, dataId, id=None)
586 datasets.append(FileDataset(path=metricFile,
587 refs=[refIn],
588 formatter=formatter))
590 butler.ingest(*datasets, transfer="copy")
592 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
593 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
595 metrics1 = butler.get(datasetTypeName, dataId1)
596 metrics2 = butler.get(datasetTypeName, dataId2)
597 self.assertNotEqual(metrics1, metrics2)
599 # Compare URIs
600 uri1 = butler.getURI(datasetTypeName, dataId1)
601 uri2 = butler.getURI(datasetTypeName, dataId2)
602 self.assertNotEqual(uri1, uri2)
604 # Now do a multi-dataset but single file ingest
605 metricFile = os.path.join(dataRoot, "detectors.yaml")
606 refs = []
607 for detector in (1, 2):
608 detector_name = f"detector_{detector}"
609 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
610 # Create a DatasetRef for ingest
611 refs.append(DatasetRef(datasetType, dataId, id=None))
613 datasets = []
614 datasets.append(FileDataset(path=metricFile,
615 refs=refs,
616 formatter=MultiDetectorFormatter))
618 butler.ingest(*datasets, transfer="copy")
620 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
621 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
623 multi1 = butler.get(datasetTypeName, dataId1)
624 multi2 = butler.get(datasetTypeName, dataId2)
626 self.assertEqual(multi1, metrics1)
627 self.assertEqual(multi2, metrics2)
629 # Compare URIs
630 uri1 = butler.getURI(datasetTypeName, dataId1)
631 uri2 = butler.getURI(datasetTypeName, dataId2)
632 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}")
634 # Test that removing one does not break the second
635 # This line will issue a warning log message for a ChainedDatastore
636 # that uses an InMemoryDatastore since in-memory can not ingest
637 # files.
638 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
639 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
640 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
641 multi2b = butler.get(datasetTypeName, dataId2)
642 self.assertEqual(multi2, multi2b)
644 def testPruneCollections(self):
645 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
646 butler = Butler(self.tmpConfigFile, writeable=True)
647 # Load registry data with dimensions to hang datasets off of.
648 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
649 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
650 # Add some RUN-type collections.
651 run1 = "run1"
652 butler.registry.registerRun(run1)
653 run2 = "run2"
654 butler.registry.registerRun(run2)
655 # put some datasets. ref1 and ref2 have the same data ID, and are in
656 # different runs. ref3 has a different data ID.
657 metric = makeExampleMetrics()
658 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
659 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
660 butler.registry)
661 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
662 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
663 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
665 # Try to delete a RUN collection without purge, or with purge and not
666 # unstore.
667 with self.assertRaises(TypeError):
668 butler.pruneCollection(run1)
669 with self.assertRaises(TypeError):
670 butler.pruneCollection(run2, purge=True)
671 # Add a TAGGED collection and associate ref3 only into it.
672 tag1 = "tag1"
673 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
674 self.assertTrue(registered)
675 # Registering a second time should be allowed.
676 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
677 self.assertFalse(registered)
678 butler.registry.associate(tag1, [ref3])
679 # Add a CHAINED collection that searches run1 and then run2. It
680 # logically contains only ref1, because ref2 is shadowed due to them
681 # having the same data ID and dataset type.
682 chain1 = "chain1"
683 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
684 butler.registry.setCollectionChain(chain1, [run1, run2])
685 # Try to delete RUN collections, which should fail with complete
686 # rollback because they're still referenced by the CHAINED
687 # collection.
688 with self.assertRaises(Exception):
689 butler.pruneCollection(run1, pruge=True, unstore=True)
690 with self.assertRaises(Exception):
691 butler.pruneCollection(run2, pruge=True, unstore=True)
692 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
693 [ref1, ref2, ref3])
694 existence = butler.datastore.mexists([ref1, ref2, ref3])
695 self.assertTrue(existence[ref1])
696 self.assertTrue(existence[ref2])
697 self.assertTrue(existence[ref3])
698 # Try to delete CHAINED and TAGGED collections with purge; should not
699 # work.
700 with self.assertRaises(TypeError):
701 butler.pruneCollection(tag1, purge=True, unstore=True)
702 with self.assertRaises(TypeError):
703 butler.pruneCollection(chain1, purge=True, unstore=True)
704 # Remove the tagged collection with unstore=False. This should not
705 # affect the datasets.
706 butler.pruneCollection(tag1)
707 with self.assertRaises(MissingCollectionError):
708 butler.registry.getCollectionType(tag1)
709 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
710 [ref1, ref2, ref3])
711 existence = butler.datastore.mexists([ref1, ref2, ref3])
712 self.assertTrue(existence[ref1])
713 self.assertTrue(existence[ref2])
714 self.assertTrue(existence[ref3])
715 # Add the tagged collection back in, and remove it with unstore=True.
716 # This should remove ref3 only from the datastore.
717 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
718 butler.registry.associate(tag1, [ref3])
719 butler.pruneCollection(tag1, unstore=True)
720 with self.assertRaises(MissingCollectionError):
721 butler.registry.getCollectionType(tag1)
722 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
723 [ref1, ref2, ref3])
724 existence = butler.datastore.mexists([ref1, ref2, ref3])
725 self.assertTrue(existence[ref1])
726 self.assertTrue(existence[ref2])
727 self.assertFalse(existence[ref3])
728 # Delete the chain with unstore=False. The datasets should not be
729 # affected at all.
730 butler.pruneCollection(chain1)
731 with self.assertRaises(MissingCollectionError):
732 butler.registry.getCollectionType(chain1)
733 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
734 [ref1, ref2, ref3])
735 existence = butler.datastore.mexists([ref1, ref2, ref3])
736 self.assertTrue(existence[ref1])
737 self.assertTrue(existence[ref2])
738 self.assertFalse(existence[ref3])
739 # Redefine and then delete the chain with unstore=True. Only ref1
740 # should be unstored (ref3 has already been unstored, but otherwise
741 # would be now).
742 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
743 butler.registry.setCollectionChain(chain1, [run1, run2])
744 butler.pruneCollection(chain1, unstore=True)
745 with self.assertRaises(MissingCollectionError):
746 butler.registry.getCollectionType(chain1)
747 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
748 [ref1, ref2, ref3])
749 existence = butler.datastore.mexists([ref1, ref2, ref3])
750 self.assertFalse(existence[ref1])
751 self.assertTrue(existence[ref2])
752 self.assertFalse(existence[ref3])
753 # Remove run1. This removes ref1 and ref3 from the registry (they're
754 # already gone from the datastore, which is fine).
755 butler.pruneCollection(run1, purge=True, unstore=True)
756 with self.assertRaises(MissingCollectionError):
757 butler.registry.getCollectionType(run1)
758 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
759 [ref2])
760 self.assertTrue(butler.datastore.exists(ref2))
761 # Remove run2. This removes ref2 from the registry and the datastore.
762 butler.pruneCollection(run2, purge=True, unstore=True)
763 with self.assertRaises(MissingCollectionError):
764 butler.registry.getCollectionType(run2)
765 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
766 [])
768 # Now that the collections have been pruned we can remove the
769 # dataset type
770 butler.registry.removeDatasetType(datasetType.name)
772 def testPickle(self):
773 """Test pickle support.
774 """
775 butler = Butler(self.tmpConfigFile, run="ingest")
776 butlerOut = pickle.loads(pickle.dumps(butler))
777 self.assertIsInstance(butlerOut, Butler)
778 self.assertEqual(butlerOut._config, butler._config)
779 self.assertEqual(butlerOut.collections, butler.collections)
780 self.assertEqual(butlerOut.run, butler.run)
782 def testGetDatasetTypes(self):
783 butler = Butler(self.tmpConfigFile, run="ingest")
784 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
785 dimensionEntries = [
786 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
787 {"instrument": "DummyCamComp"}),
788 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
789 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
790 ]
791 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
792 # Add needed Dimensions
793 for args in dimensionEntries:
794 butler.registry.insertDimensionData(*args)
796 # When a DatasetType is added to the registry entries are not created
797 # for components but querying them can return the components.
798 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"}
799 components = set()
800 for datasetTypeName in datasetTypeNames:
801 # Create and register a DatasetType
802 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
804 for componentName in storageClass.components:
805 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
807 fromRegistry = set(butler.registry.queryDatasetTypes(components=True))
808 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
810 # Now that we have some dataset types registered, validate them
811 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
812 "datasetType.component", "random_data", "random_data_2"])
814 # Add a new datasetType that will fail template validation
815 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
816 if self.validationCanFail:
817 with self.assertRaises(ValidationError):
818 butler.validateConfiguration()
820 # Rerun validation but with a subset of dataset type names
821 butler.validateConfiguration(datasetTypeNames=["metric4"])
823 # Rerun validation but ignore the bad datasetType
824 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
825 "datasetType.component", "random_data", "random_data_2"])
827 def testTransaction(self):
828 butler = Butler(self.tmpConfigFile, run="ingest")
829 datasetTypeName = "test_metric"
830 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
831 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
832 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
833 "band": "R"}),
834 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
835 "physical_filter": "d-r"}))
836 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
837 metric = makeExampleMetrics()
838 dataId = {"instrument": "DummyCam", "visit": 42}
839 # Create and register a DatasetType
840 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
841 with self.assertRaises(TransactionTestError):
842 with butler.transaction():
843 # Add needed Dimensions
844 for args in dimensionEntries:
845 butler.registry.insertDimensionData(*args)
846 # Store a dataset
847 ref = butler.put(metric, datasetTypeName, dataId)
848 self.assertIsInstance(ref, DatasetRef)
849 # Test getDirect
850 metricOut = butler.getDirect(ref)
851 self.assertEqual(metric, metricOut)
852 # Test get
853 metricOut = butler.get(datasetTypeName, dataId)
854 self.assertEqual(metric, metricOut)
855 # Check we can get components
856 self.assertGetComponents(butler, ref,
857 ("summary", "data", "output"), metric)
858 raise TransactionTestError("This should roll back the entire transaction")
859 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"):
860 butler.registry.expandDataId(dataId)
861 # Should raise LookupError for missing data ID value
862 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"):
863 butler.get(datasetTypeName, dataId)
864 # Also check explicitly if Dataset entry is missing
865 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
866 # Direct retrieval should not find the file in the Datastore
867 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"):
868 butler.getDirect(ref)
870 def testMakeRepo(self):
871 """Test that we can write butler configuration to a new repository via
872 the Butler.makeRepo interface and then instantiate a butler from the
873 repo root.
874 """
875 # Do not run the test if we know this datastore configuration does
876 # not support a file system root
877 if self.fullConfigKey is None:
878 return
880 # create two separate directories
881 root1 = tempfile.mkdtemp(dir=self.root)
882 root2 = tempfile.mkdtemp(dir=self.root)
884 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile))
885 limited = Config(self.configFile)
886 butler1 = Butler(butlerConfig)
887 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile))
888 full = Config(self.tmpConfigFile)
889 butler2 = Butler(butlerConfig)
890 # Butlers should have the same configuration regardless of whether
891 # defaults were expanded.
892 self.assertEqual(butler1._config, butler2._config)
893 # Config files loaded directly should not be the same.
894 self.assertNotEqual(limited, full)
895 # Make sure "limited" doesn't have a few keys we know it should be
896 # inheriting from defaults.
897 self.assertIn(self.fullConfigKey, full)
898 self.assertNotIn(self.fullConfigKey, limited)
900 # Collections don't appear until something is put in them
901 collections1 = set(butler1.registry.queryCollections())
902 self.assertEqual(collections1, set())
903 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
905 # Check that a config with no associated file name will not
906 # work properly with relocatable Butler repo
907 butlerConfig.configFile = None
908 with self.assertRaises(ValueError):
909 Butler(butlerConfig)
911 with self.assertRaises(FileExistsError):
912 Butler.makeRepo(self.root, standalone=True,
913 config=Config(self.configFile), overwrite=False)
915 def testStringification(self):
916 butler = Butler(self.tmpConfigFile, run="ingest")
917 butlerStr = str(butler)
919 if self.datastoreStr is not None:
920 for testStr in self.datastoreStr:
921 self.assertIn(testStr, butlerStr)
922 if self.registryStr is not None:
923 self.assertIn(self.registryStr, butlerStr)
925 datastoreName = butler.datastore.name
926 if self.datastoreName is not None:
927 for testStr in self.datastoreName:
928 self.assertIn(testStr, datastoreName)
930 def testButlerRewriteDataId(self):
931 """Test that dataIds can be rewritten based on dimension records."""
933 butler = Butler(self.tmpConfigFile, run="ingest")
935 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
936 datasetTypeName = "random_data"
938 # Create dimension records.
939 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
940 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
941 "name": "d-r",
942 "band": "R"})
943 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
944 "id": 1, "full_name": "det1"})
946 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
947 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
948 butler.registry.registerDatasetType(datasetType)
950 n_exposures = 5
951 dayobs = 20210530
953 for i in range(n_exposures):
954 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
955 "id": i, "obs_id": f"exp{i}",
956 "seq_num": i, "day_obs": dayobs,
957 "physical_filter": "d-r"})
959 # Write some data.
960 for i in range(n_exposures):
961 metric = {"something": i,
962 "other": "metric",
963 "list": [2*x for x in range(i)]}
965 # Use the seq_num for the put to test rewriting.
966 dataId = {"seq_num": i, "day_obs": dayobs, "detector": 1, "instrument": "DummyCamComp",
967 "physical_filter": "d-r"}
968 ref = butler.put(metric, datasetTypeName, dataId=dataId)
970 # Check that the exposure is correct in the dataId
971 self.assertEqual(ref.dataId["exposure"], i)
973 # and check that we can get the dataset back with the same dataId
974 new_metric = butler.get(datasetTypeName, dataId=dataId)
975 self.assertEqual(new_metric, metric)
978class FileDatastoreButlerTests(ButlerTests):
979 """Common tests and specialization of ButlerTests for butlers backed
980 by datastores that inherit from FileDatastore.
981 """
983 def checkFileExists(self, root, relpath):
984 """Checks if file exists at a given path (relative to root).
986 Test testPutTemplates verifies actual physical existance of the files
987 in the requested location.
988 """
989 uri = ButlerURI(root, forceDirectory=True)
990 return uri.join(relpath).exists()
992 def testPutTemplates(self):
993 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
994 butler = Butler(self.tmpConfigFile, run="ingest")
996 # Add needed Dimensions
997 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
998 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
999 "name": "d-r",
1000 "band": "R"})
1001 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
1002 "physical_filter": "d-r"})
1003 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
1004 "physical_filter": "d-r"})
1006 # Create and store a dataset
1007 metric = makeExampleMetrics()
1009 # Create two almost-identical DatasetTypes (both will use default
1010 # template)
1011 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
1012 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
1013 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
1014 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
1016 dataId1 = {"instrument": "DummyCamComp", "visit": 423}
1017 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
1019 # Put with exactly the data ID keys needed
1020 ref = butler.put(metric, "metric1", dataId1)
1021 uri = butler.getURI(ref)
1022 self.assertTrue(self.checkFileExists(butler.datastore.root,
1023 "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
1024 f"Checking existence of {uri}")
1026 # Check the template based on dimensions
1027 butler.datastore.templates.validateTemplates([ref])
1029 # Put with extra data ID keys (physical_filter is an optional
1030 # dependency); should not change template (at least the way we're
1031 # defining them to behave now; the important thing is that they
1032 # must be consistent).
1033 ref = butler.put(metric, "metric2", dataId2)
1034 uri = butler.getURI(ref)
1035 self.assertTrue(self.checkFileExists(butler.datastore.root,
1036 "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
1037 f"Checking existence of {uri}")
1039 # Check the template based on dimensions
1040 butler.datastore.templates.validateTemplates([ref])
1042 # Now use a file template that will not result in unique filenames
1043 with self.assertRaises(FileTemplateValidationError):
1044 butler.put(metric, "metric3", dataId1)
1046 def testImportExport(self):
1047 # Run put/get tests just to create and populate a repo.
1048 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1049 self.runImportExportTest(storageClass)
1051 @unittest.expectedFailure
1052 def testImportExportVirtualComposite(self):
1053 # Run put/get tests just to create and populate a repo.
1054 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
1055 self.runImportExportTest(storageClass)
1057 def runImportExportTest(self, storageClass):
1058 """This test does an export to a temp directory and an import back
1059 into a new temp directory repo. It does not assume a posix datastore"""
1060 exportButler = self.runPutGetTest(storageClass, "test_metric")
1061 print("Root:", exportButler.datastore.root)
1062 # Test that the repo actually has at least one dataset.
1063 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1064 self.assertGreater(len(datasets), 0)
1065 # Add a DimensionRecord that's unused by those datasets.
1066 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")}
1067 exportButler.registry.insertDimensionData("skymap", skymapRecord)
1068 # Export and then import datasets.
1069 with safeTestTempDir(TESTDIR) as exportDir:
1070 exportFile = os.path.join(exportDir, "exports.yaml")
1071 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export:
1072 export.saveDatasets(datasets)
1073 # Export the same datasets again. This should quietly do
1074 # nothing because of internal deduplication, and it shouldn't
1075 # complain about being asked to export the "htm7" elements even
1076 # though there aren't any in these datasets or in the database.
1077 export.saveDatasets(datasets, elements=["htm7"])
1078 # Save one of the data IDs again; this should be harmless
1079 # because of internal deduplication.
1080 export.saveDataIds([datasets[0].dataId])
1081 # Save some dimension records directly.
1082 export.saveDimensionData("skymap", [skymapRecord])
1083 self.assertTrue(os.path.exists(exportFile))
1084 with safeTestTempDir(TESTDIR) as importDir:
1085 # We always want this to be a local posix butler
1086 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml")))
1087 # Calling script.butlerImport tests the implementation of the
1088 # butler command line interface "import" subcommand. Functions
1089 # in the script folder are generally considered protected and
1090 # should not be used as public api.
1091 with open(exportFile, "r") as f:
1092 script.butlerImport(importDir, export_file=f, directory=exportDir,
1093 transfer="auto", skip_dimensions=None, reuse_ids=False)
1094 importButler = Butler(importDir, run="ingest")
1095 for ref in datasets:
1096 with self.subTest(ref=ref):
1097 # Test for existence by passing in the DatasetType and
1098 # data ID separately, to avoid lookup by dataset_id.
1099 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
1100 self.assertEqual(list(importButler.registry.queryDimensionRecords("skymap")),
1101 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)])
1103 def testRemoveRuns(self):
1104 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1105 butler = Butler(self.tmpConfigFile, writeable=True)
1106 # Load registry data with dimensions to hang datasets off of.
1107 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
1108 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1109 # Add some RUN-type collection.
1110 run1 = "run1"
1111 butler.registry.registerRun(run1)
1112 run2 = "run2"
1113 butler.registry.registerRun(run2)
1114 # put a dataset in each
1115 metric = makeExampleMetrics()
1116 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1117 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1118 butler.registry)
1119 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1120 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1121 uri1 = butler.getURI(ref1, collections=[run1])
1122 uri2 = butler.getURI(ref2, collections=[run2])
1123 # Remove from both runs with different values for unstore.
1124 butler.removeRuns([run1], unstore=True)
1125 butler.removeRuns([run2], unstore=False)
1126 # Should be nothing in registry for either one, and datastore should
1127 # not think either exists.
1128 with self.assertRaises(MissingCollectionError):
1129 butler.registry.getCollectionType(run1)
1130 with self.assertRaises(MissingCollectionError):
1131 butler.registry.getCollectionType(run2)
1132 self.assertFalse(butler.datastore.exists(ref1))
1133 self.assertFalse(butler.datastore.exists(ref2))
1134 # The ref we unstored should be gone according to the URI, but the
1135 # one we forgot should still be around.
1136 self.assertFalse(uri1.exists())
1137 self.assertTrue(uri2.exists())
1140class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1141 """PosixDatastore specialization of a butler"""
1142 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1143 fullConfigKey = ".datastore.formatters"
1144 validationCanFail = True
1145 datastoreStr = ["/tmp"]
1146 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"]
1147 registryStr = "/gen3.sqlite3"
1149 def testExportTransferCopy(self):
1150 """Test local export using all transfer modes"""
1151 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1152 exportButler = self.runPutGetTest(storageClass, "test_metric")
1153 # Test that the repo actually has at least one dataset.
1154 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
1155 self.assertGreater(len(datasets), 0)
1156 uris = [exportButler.getURI(d) for d in datasets]
1157 datastoreRoot = exportButler.datastore.root
1159 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris]
1161 for path in pathsInStore:
1162 # Assume local file system
1163 self.assertTrue(self.checkFileExists(datastoreRoot, path),
1164 f"Checking path {path}")
1166 for transfer in ("copy", "link", "symlink", "relsymlink"):
1167 with safeTestTempDir(TESTDIR) as exportDir:
1168 with exportButler.export(directory=exportDir, format="yaml",
1169 transfer=transfer) as export:
1170 export.saveDatasets(datasets)
1171 for path in pathsInStore:
1172 self.assertTrue(self.checkFileExists(exportDir, path),
1173 f"Check that mode {transfer} exported files")
1175 def testPruneDatasets(self):
1176 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1177 butler = Butler(self.tmpConfigFile, writeable=True)
1178 # Load registry data with dimensions to hang datasets off of.
1179 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry"))
1180 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
1181 # Add some RUN-type collections.
1182 run1 = "run1"
1183 butler.registry.registerRun(run1)
1184 run2 = "run2"
1185 butler.registry.registerRun(run2)
1186 # put some datasets. ref1 and ref2 have the same data ID, and are in
1187 # different runs. ref3 has a different data ID.
1188 metric = makeExampleMetrics()
1189 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
1190 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
1191 butler.registry)
1192 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
1193 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
1194 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
1196 # Simple prune.
1197 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1198 with self.assertRaises(LookupError):
1199 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1)
1201 # Put data back.
1202 ref1 = butler.put(metric, ref1.unresolved(), run=run1)
1203 ref2 = butler.put(metric, ref2.unresolved(), run=run2)
1204 ref3 = butler.put(metric, ref3.unresolved(), run=run1)
1206 # Check that in normal mode, deleting the record will lead to
1207 # trash not touching the file.
1208 uri1 = butler.datastore.getURI(ref1)
1209 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table
1210 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id})
1211 butler.datastore.trash(ref1)
1212 butler.datastore.emptyTrash()
1213 self.assertTrue(uri1.exists())
1214 uri1.remove() # Clean it up.
1216 # Simulate execution butler setup by deleting the datastore
1217 # record but keeping the file around and trusting.
1218 butler.datastore.trustGetRequest = True
1219 uri2 = butler.datastore.getURI(ref2)
1220 uri3 = butler.datastore.getURI(ref3)
1221 self.assertTrue(uri2.exists())
1222 self.assertTrue(uri3.exists())
1224 # Remove the datastore record.
1225 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table
1226 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id})
1227 self.assertTrue(uri2.exists())
1228 butler.datastore.trash([ref2, ref3])
1229 # Immediate removal for ref2 file
1230 self.assertFalse(uri2.exists())
1231 # But ref3 has to wait for the empty.
1232 self.assertTrue(uri3.exists())
1233 butler.datastore.emptyTrash()
1234 self.assertFalse(uri3.exists())
1236 # Clear out the datasets from registry.
1237 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True)
1240class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1241 """InMemoryDatastore specialization of a butler"""
1242 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
1243 fullConfigKey = None
1244 useTempRoot = False
1245 validationCanFail = False
1246 datastoreStr = ["datastore='InMemory"]
1247 datastoreName = ["InMemoryDatastore@"]
1248 registryStr = "/gen3.sqlite3"
1250 def testIngest(self):
1251 pass
1254class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
1255 """PosixDatastore specialization"""
1256 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
1257 fullConfigKey = ".datastore.datastores.1.formatters"
1258 validationCanFail = True
1259 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"]
1260 datastoreName = ["InMemoryDatastore@", f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1",
1261 "SecondDatastore"]
1262 registryStr = "/gen3.sqlite3"
1265class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
1266 """Test that a yaml file in one location can refer to a root in another."""
1268 datastoreStr = ["dir1"]
1269 # Disable the makeRepo test since we are deliberately not using
1270 # butler.yaml as the config name.
1271 fullConfigKey = None
1273 def setUp(self):
1274 self.root = makeTestTempDir(TESTDIR)
1276 # Make a new repository in one place
1277 self.dir1 = os.path.join(self.root, "dir1")
1278 Butler.makeRepo(self.dir1, config=Config(self.configFile))
1280 # Move the yaml file to a different place and add a "root"
1281 self.dir2 = os.path.join(self.root, "dir2")
1282 os.makedirs(self.dir2, exist_ok=True)
1283 configFile1 = os.path.join(self.dir1, "butler.yaml")
1284 config = Config(configFile1)
1285 config["root"] = self.dir1
1286 configFile2 = os.path.join(self.dir2, "butler2.yaml")
1287 config.dumpToUri(configFile2)
1288 os.remove(configFile1)
1289 self.tmpConfigFile = configFile2
1291 def testFileLocations(self):
1292 self.assertNotEqual(self.dir1, self.dir2)
1293 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
1294 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
1295 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
1298class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
1299 """Test that a config file created by makeRepo outside of repo works."""
1301 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1303 def setUp(self):
1304 self.root = makeTestTempDir(TESTDIR)
1305 self.root2 = makeTestTempDir(TESTDIR)
1307 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
1308 Butler.makeRepo(self.root, config=Config(self.configFile),
1309 outfile=self.tmpConfigFile)
1311 def tearDown(self):
1312 if os.path.exists(self.root2):
1313 shutil.rmtree(self.root2, ignore_errors=True)
1314 super().tearDown()
1316 def testConfigExistence(self):
1317 c = Config(self.tmpConfigFile)
1318 uri_config = ButlerURI(c["root"])
1319 uri_expected = ButlerURI(self.root, forceDirectory=True)
1320 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
1321 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
1323 def testPutGet(self):
1324 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
1325 self.runPutGetTest(storageClass, "test_metric")
1328class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
1329 """Test that a config file created by makeRepo outside of repo works."""
1331 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1333 def setUp(self):
1334 self.root = makeTestTempDir(TESTDIR)
1335 self.root2 = makeTestTempDir(TESTDIR)
1337 self.tmpConfigFile = self.root2
1338 Butler.makeRepo(self.root, config=Config(self.configFile),
1339 outfile=self.tmpConfigFile)
1341 def testConfigExistence(self):
1342 # Append the yaml file else Config constructor does not know the file
1343 # type.
1344 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
1345 super().testConfigExistence()
1348class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
1349 """Test that a config file created by makeRepo outside of repo works."""
1351 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1353 def setUp(self):
1354 self.root = makeTestTempDir(TESTDIR)
1355 self.root2 = makeTestTempDir(TESTDIR)
1357 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
1358 Butler.makeRepo(self.root, config=Config(self.configFile),
1359 outfile=self.tmpConfigFile)
1362@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1363@mock_s3
1364class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1365 """S3Datastore specialization of a butler; an S3 storage Datastore +
1366 a local in-memory SqlRegistry.
1367 """
1368 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1369 fullConfigKey = None
1370 validationCanFail = True
1372 bucketName = "anybucketname"
1373 """Name of the Bucket that will be used in the tests. The name is read from
1374 the config file used with the tests during set-up.
1375 """
1377 root = "butlerRoot/"
1378 """Root repository directory expected to be used in case useTempRoot=False.
1379 Otherwise the root is set to a 20 characters long randomly generated string
1380 during set-up.
1381 """
1383 datastoreStr = [f"datastore={root}"]
1384 """Contains all expected root locations in a format expected to be
1385 returned by Butler stringification.
1386 """
1388 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"]
1389 """The expected format of the S3 Datastore string."""
1391 registryStr = "/gen3.sqlite3"
1392 """Expected format of the Registry string."""
1394 def genRoot(self):
1395 """Returns a random string of len 20 to serve as a root
1396 name for the temporary bucket repo.
1398 This is equivalent to tempfile.mkdtemp as this is what self.root
1399 becomes when useTempRoot is True.
1400 """
1401 rndstr = "".join(
1402 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1403 )
1404 return rndstr + "/"
1406 def setUp(self):
1407 config = Config(self.configFile)
1408 uri = ButlerURI(config[".datastore.datastore.root"])
1409 self.bucketName = uri.netloc
1411 # set up some fake credentials if they do not exist
1412 self.usingDummyCredentials = setAwsEnvCredentials()
1414 if self.useTempRoot:
1415 self.root = self.genRoot()
1416 rooturi = f"s3://{self.bucketName}/{self.root}"
1417 config.update({"datastore": {"datastore": {"root": rooturi}}})
1419 # need local folder to store registry database
1420 self.reg_dir = makeTestTempDir(TESTDIR)
1421 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1423 # MOTO needs to know that we expect Bucket bucketname to exist
1424 # (this used to be the class attribute bucketName)
1425 s3 = boto3.resource("s3")
1426 s3.create_bucket(Bucket=self.bucketName)
1428 self.datastoreStr = f"datastore={self.root}"
1429 self.datastoreName = [f"FileDatastore@{rooturi}"]
1430 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1431 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1433 def tearDown(self):
1434 s3 = boto3.resource("s3")
1435 bucket = s3.Bucket(self.bucketName)
1436 try:
1437 bucket.objects.all().delete()
1438 except botocore.exceptions.ClientError as e:
1439 if e.response["Error"]["Code"] == "404":
1440 # the key was not reachable - pass
1441 pass
1442 else:
1443 raise
1445 bucket = s3.Bucket(self.bucketName)
1446 bucket.delete()
1448 # unset any potentially set dummy credentials
1449 if self.usingDummyCredentials:
1450 unsetAwsEnvCredentials()
1452 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1453 shutil.rmtree(self.reg_dir, ignore_errors=True)
1455 if self.useTempRoot and os.path.exists(self.root):
1456 shutil.rmtree(self.root, ignore_errors=True)
1459@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!")
1460# Mock required environment variables during tests
1461@unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1462 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1463 TESTDIR, "config/testConfigs/webdav/token"),
1464 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1465class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):
1466 """WebdavDatastore specialization of a butler; a Webdav storage Datastore +
1467 a local in-memory SqlRegistry.
1468 """
1469 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml")
1470 fullConfigKey = None
1471 validationCanFail = True
1473 serverName = "localhost"
1474 """Name of the server that will be used in the tests.
1475 """
1477 portNumber = 8080
1478 """Port on which the webdav server listens. Automatically chosen
1479 at setUpClass via the _getfreeport() method
1480 """
1482 root = "butlerRoot/"
1483 """Root repository directory expected to be used in case useTempRoot=False.
1484 Otherwise the root is set to a 20 characters long randomly generated string
1485 during set-up.
1486 """
1488 datastoreStr = [f"datastore={root}"]
1489 """Contains all expected root locations in a format expected to be
1490 returned by Butler stringification.
1491 """
1493 datastoreName = ["FileDatastore@https://{serverName}/{root}"]
1494 """The expected format of the WebdavDatastore string."""
1496 registryStr = "/gen3.sqlite3"
1497 """Expected format of the Registry string."""
1499 serverThread = None
1500 """Thread in which the local webdav server will run"""
1502 stopWebdavServer = False
1503 """This flag will cause the webdav server to
1504 gracefully shut down when True
1505 """
1507 def genRoot(self):
1508 """Returns a random string of len 20 to serve as a root
1509 name for the temporary bucket repo.
1511 This is equivalent to tempfile.mkdtemp as this is what self.root
1512 becomes when useTempRoot is True.
1513 """
1514 rndstr = "".join(
1515 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1516 )
1517 return rndstr + "/"
1519 @classmethod
1520 def setUpClass(cls):
1521 # Do the same as inherited class
1522 cls.storageClassFactory = StorageClassFactory()
1523 cls.storageClassFactory.addFromConfig(cls.configFile)
1525 cls.portNumber = cls._getfreeport()
1526 # Run a local webdav server on which tests will be run
1527 cls.serverThread = Thread(target=cls._serveWebdav,
1528 args=(cls, cls.portNumber, lambda: cls.stopWebdavServer),
1529 daemon=True)
1530 cls.serverThread.start()
1531 # Wait for it to start
1532 time.sleep(3)
1534 @classmethod
1535 def tearDownClass(cls):
1536 # Ask for graceful shut down of the webdav server
1537 cls.stopWebdavServer = True
1538 # Wait for the thread to exit
1539 cls.serverThread.join()
1541 # Mock required environment variables during tests
1542 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1543 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1544 TESTDIR, "config/testConfigs/webdav/token"),
1545 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1546 def setUp(self):
1547 config = Config(self.configFile)
1549 if self.useTempRoot:
1550 self.root = self.genRoot()
1551 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}"
1552 config.update({"datastore": {"datastore": {"root": self.rooturi}}})
1554 # need local folder to store registry database
1555 self.reg_dir = makeTestTempDir(TESTDIR)
1556 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3"
1558 self.datastoreStr = f"datastore={self.root}"
1559 self.datastoreName = [f"FileDatastore@{self.rooturi}"]
1561 if not isWebdavEndpoint(self.rooturi):
1562 raise OSError("Webdav server not running properly: cannot run tests.")
1564 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False)
1565 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml")
1567 # Mock required environment variables during tests
1568 @unittest.mock.patch.dict(os.environ, {"LSST_BUTLER_WEBDAV_AUTH": "TOKEN",
1569 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(
1570 TESTDIR, "config/testConfigs/webdav/token"),
1571 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs"})
1572 def tearDown(self):
1573 # Clear temporary directory
1574 ButlerURI(self.rooturi).remove()
1575 ButlerURI(self.rooturi).session.close()
1577 if self.reg_dir is not None and os.path.exists(self.reg_dir):
1578 shutil.rmtree(self.reg_dir, ignore_errors=True)
1580 if self.useTempRoot and os.path.exists(self.root):
1581 shutil.rmtree(self.root, ignore_errors=True)
1583 def _serveWebdav(self, port: int, stopWebdavServer):
1584 """Starts a local webdav-compatible HTTP server,
1585 Listening on http://localhost:port
1586 This server only runs when this test class is instantiated,
1587 and then shuts down. Must be started is a separate thread.
1589 Parameters
1590 ----------
1591 port : `int`
1592 The port number on which the server should listen
1593 """
1594 root_path = gettempdir()
1596 config = {
1597 "host": "0.0.0.0",
1598 "port": port,
1599 "provider_mapping": {"/": root_path},
1600 "http_authenticator": {
1601 "domain_controller": None
1602 },
1603 "simple_dc": {"user_mapping": {"*": True}},
1604 "verbose": 0,
1605 }
1606 app = WsgiDAVApp(config)
1608 server_args = {
1609 "bind_addr": (config["host"], config["port"]),
1610 "wsgi_app": app,
1611 }
1612 server = wsgi.Server(**server_args)
1613 server.prepare()
1615 try:
1616 # Start the actual server in a separate thread
1617 t = Thread(target=server.serve, daemon=True)
1618 t.start()
1619 # watch stopWebdavServer, and gracefully
1620 # shut down the server when True
1621 while True:
1622 if stopWebdavServer():
1623 break
1624 time.sleep(1)
1625 except KeyboardInterrupt:
1626 print("Caught Ctrl-C, shutting down...")
1627 finally:
1628 server.stop()
1629 t.join()
1631 def _getfreeport():
1632 """
1633 Determines a free port using sockets.
1634 """
1635 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1636 free_socket.bind(('0.0.0.0', 0))
1637 free_socket.listen()
1638 port = free_socket.getsockname()[1]
1639 free_socket.close()
1640 return port
1643class PosixDatastoreTransfers(unittest.TestCase):
1644 """Test data transfers between butlers.
1646 Test for different managers. UUID to UUID and integer to integer are
1647 tested. UUID to integer is not supported since we do not currently
1648 want to allow that. Integer to UUID is supported with the caveat
1649 that UUID4 will be generated and this will be incorrect for raw
1650 dataset types. The test ignores that.
1651 """
1653 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
1655 @classmethod
1656 def setUpClass(cls):
1657 cls.storageClassFactory = StorageClassFactory()
1658 cls.storageClassFactory.addFromConfig(cls.configFile)
1660 def setUp(self):
1661 self.root = makeTestTempDir(TESTDIR)
1662 self.config = Config(self.configFile)
1664 def tearDown(self):
1665 removeTestTempDir(self.root)
1667 def create_butler(self, manager, label):
1668 config = Config(self.configFile)
1669 config["registry", "managers", "datasets"] = manager
1670 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config),
1671 writeable=True)
1673 def create_butlers(self, manager1, manager2):
1674 self.source_butler = self.create_butler(manager1, "1")
1675 self.target_butler = self.create_butler(manager2, "2")
1677 def testTransferUuidToUuid(self):
1678 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1679 "ByDimensionsDatasetRecordStorageManagerUUID",
1680 "lsst.daf.butler.registry.datasets.byDimensions."
1681 "ByDimensionsDatasetRecordStorageManagerUUID",
1682 )
1683 # Setting id_gen_map should have no effect here
1684 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1686 def testTransferIntToInt(self):
1687 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1688 "ByDimensionsDatasetRecordStorageManager",
1689 "lsst.daf.butler.registry.datasets.byDimensions."
1690 "ByDimensionsDatasetRecordStorageManager",
1691 )
1692 # int dataset ID only allows UNIQUE
1693 self.assertButlerTransfers()
1695 def testTransferIntToUuid(self):
1696 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1697 "ByDimensionsDatasetRecordStorageManager",
1698 "lsst.daf.butler.registry.datasets.byDimensions."
1699 "ByDimensionsDatasetRecordStorageManagerUUID",
1700 )
1701 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE})
1703 def testTransferMissing(self):
1704 """Test transfers where datastore records are missing.
1706 This is how execution butler works.
1707 """
1708 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1709 "ByDimensionsDatasetRecordStorageManagerUUID",
1710 "lsst.daf.butler.registry.datasets.byDimensions."
1711 "ByDimensionsDatasetRecordStorageManagerUUID",
1712 )
1714 # Configure the source butler to allow trust.
1715 self.source_butler.datastore.trustGetRequest = True
1717 self.assertButlerTransfers(purge=True)
1719 def testTransferMissingDisassembly(self):
1720 """Test transfers where datastore records are missing.
1722 This is how execution butler works.
1723 """
1724 self.create_butlers("lsst.daf.butler.registry.datasets.byDimensions."
1725 "ByDimensionsDatasetRecordStorageManagerUUID",
1726 "lsst.daf.butler.registry.datasets.byDimensions."
1727 "ByDimensionsDatasetRecordStorageManagerUUID",
1728 )
1730 # Configure the source butler to allow trust.
1731 self.source_butler.datastore.trustGetRequest = True
1733 # Test disassembly.
1734 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite")
1736 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"):
1737 """Test that a run can be transferred to another butler."""
1739 storageClass = self.storageClassFactory.getStorageClass(storageClassName)
1740 datasetTypeName = "random_data"
1742 # Test will create 3 collections and we will want to transfer
1743 # two of those three.
1744 runs = ["run1", "run2", "other"]
1746 # Also want to use two different dataset types to ensure that
1747 # grouping works.
1748 datasetTypeNames = ["random_data", "random_data_2"]
1750 # Create the run collections in the source butler.
1751 for run in runs:
1752 self.source_butler.registry.registerCollection(run, CollectionType.RUN)
1754 # Create dimensions in both butlers (transfer will not create them).
1755 n_exposures = 30
1756 for butler in (self.source_butler, self.target_butler):
1757 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
1758 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
1759 "name": "d-r",
1760 "band": "R"})
1761 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp",
1762 "id": 1, "full_name": "det1"})
1764 for i in range(n_exposures):
1765 butler.registry.insertDimensionData("exposure", {"instrument": "DummyCamComp",
1766 "id": i, "obs_id": f"exp{i}",
1767 "physical_filter": "d-r"})
1769 # Create dataset types in the source butler.
1770 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"])
1771 for datasetTypeName in datasetTypeNames:
1772 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
1773 self.source_butler.registry.registerDatasetType(datasetType)
1775 # Write a dataset to an unrelated run -- this will ensure that
1776 # we are rewriting integer dataset ids in the target if necessary.
1777 # Will not be relevant for UUID.
1778 run = "distraction"
1779 butler = Butler(butler=self.source_butler, run=run)
1780 butler.put(makeExampleMetrics(), datasetTypeName,
1781 exposure=1, detector=1, instrument="DummyCamComp", physical_filter="d-r")
1783 # Write some example metrics to the source
1784 butler = Butler(butler=self.source_butler)
1786 # Set of DatasetRefs that should be in the list of refs to transfer
1787 # but which will not be transferred.
1788 deleted = set()
1790 n_expected = 20 # Number of datasets expected to be transferred
1791 source_refs = []
1792 for i in range(n_exposures):
1793 # Put a third of datasets into each collection, only retain
1794 # two thirds.
1795 index = i % 3
1796 run = runs[index]
1797 datasetTypeName = datasetTypeNames[i % 2]
1799 metric_data = {"summary": {"counter": i},
1800 "output": {"text": "metric"},
1801 "data": [2*x for x in range(i)]}
1802 metric = MetricsExample(**metric_data)
1803 dataId = {"exposure": i, "detector": 1, "instrument": "DummyCamComp", "physical_filter": "d-r"}
1804 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run)
1806 # Remove the datastore record using low-level API
1807 if purge:
1808 # Remove records for a fraction.
1809 if index == 1:
1811 # For one of these delete the file as well.
1812 # This allows the "missing" code to filter the
1813 # file out.
1814 if not deleted:
1815 primary, uris = butler.datastore.getURIs(ref)
1816 if primary:
1817 primary.remove()
1818 for uri in uris.values():
1819 uri.remove()
1820 n_expected -= 1
1821 deleted.add(ref)
1823 # Remove the datastore record.
1824 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id})
1826 if index < 2:
1827 source_refs.append(ref)
1828 if ref not in deleted:
1829 new_metric = butler.get(ref.unresolved(), collections=run)
1830 self.assertEqual(new_metric, metric)
1832 # Create some bad dataset types to ensure we check for inconsistent
1833 # definitions.
1834 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList")
1835 for datasetTypeName in datasetTypeNames:
1836 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass)
1837 self.target_butler.registry.registerDatasetType(datasetType)
1838 with self.assertRaises(ConflictingDefinitionError):
1839 self.target_butler.transfer_from(self.source_butler, source_refs,
1840 id_gen_map=id_gen_map)
1841 # And remove the bad definitions.
1842 for datasetTypeName in datasetTypeNames:
1843 self.target_butler.registry.removeDatasetType(datasetTypeName)
1845 # Transfer without creating dataset types should fail.
1846 with self.assertRaises(KeyError):
1847 self.target_butler.transfer_from(self.source_butler, source_refs,
1848 id_gen_map=id_gen_map)
1850 # Now transfer them to the second butler
1851 with self.assertLogs(level=logging.DEBUG) as cm:
1852 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1853 id_gen_map=id_gen_map,
1854 register_dataset_types=True)
1855 self.assertEqual(len(transferred), n_expected)
1856 log_output = ";".join(cm.output)
1857 self.assertIn("found in datastore for chunk", log_output)
1858 self.assertIn("Creating output run", log_output)
1860 # Do the transfer twice to ensure that it will do nothing extra.
1861 # Only do this if purge=True because it does not work for int
1862 # dataset_id.
1863 if purge:
1864 # This should not need to register dataset types.
1865 transferred = self.target_butler.transfer_from(self.source_butler, source_refs,
1866 id_gen_map=id_gen_map)
1867 self.assertEqual(len(transferred), n_expected)
1869 # Also do an explicit low-level transfer to trigger some
1870 # edge cases.
1871 with self.assertLogs(level=logging.DEBUG) as cm:
1872 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs)
1873 log_output = ";".join(cm.output)
1874 self.assertIn("no file artifacts exist", log_output)
1876 with self.assertRaises(TypeError):
1877 self.target_butler.datastore.transfer_from(self.source_butler, source_refs)
1879 with self.assertRaises(ValueError):
1880 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs,
1881 transfer="split")
1883 # Now try to get the same refs from the new butler.
1884 for ref in source_refs:
1885 if ref not in deleted:
1886 unresolved_ref = ref.unresolved()
1887 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run)
1888 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run)
1889 self.assertEqual(new_metric, old_metric)
1891 # Now prune run2 collection and create instead a CHAINED collection.
1892 # This should block the transfer.
1893 self.target_butler.pruneCollection("run2", purge=True, unstore=True)
1894 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED)
1895 with self.assertRaises(TypeError):
1896 # Re-importing the run1 datasets can be problematic if they
1897 # use integer IDs so filter those out.
1898 to_transfer = [ref for ref in source_refs if ref.run == "run2"]
1899 self.target_butler.transfer_from(self.source_butler, to_transfer,
1900 id_gen_map=id_gen_map)
1903if __name__ == "__main__": 1903 ↛ 1904line 1903 didn't jump to line 1904, because the condition on line 1903 was never true
1904 unittest.main()