Coverage for tests/test_butler.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Tests for Butler.
23"""
25import os
26import posixpath
27import unittest
28import tempfile
29import shutil
30import pickle
31import string
32import random
33import numpy as np
35try:
36 import boto3
37 import botocore
38 from moto import mock_s3
39except ImportError:
40 boto3 = None
42 def mock_s3(cls):
43 """A no-op decorator in case moto mock_s3 can not be imported.
44 """
45 return cls
47from lsst.utils import doImport
48from lsst.daf.butler.core.safeFileIo import safeMakeDir
49from lsst.daf.butler import Butler, Config, ButlerConfig
50from lsst.daf.butler import StorageClassFactory
51from lsst.daf.butler import DatasetType, DatasetRef
52from lsst.daf.butler import FileTemplateValidationError, ValidationError
53from lsst.daf.butler import FileDataset
54from lsst.daf.butler import CollectionSearch, CollectionType
55from lsst.daf.butler.registry import MissingCollectionError
56from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG
57from lsst.daf.butler.core.location import ButlerURI
58from lsst.daf.butler.core.s3utils import (s3CheckFileExists, setAwsEnvCredentials,
59 unsetAwsEnvCredentials)
61from lsst.daf.butler.tests import MultiDetectorFormatter, MetricsExample
63TESTDIR = os.path.abspath(os.path.dirname(__file__))
66def makeExampleMetrics():
67 return MetricsExample({"AM1": 5.2, "AM2": 30.6},
68 {"a": [1, 2, 3],
69 "b": {"blue": 5, "red": "green"}},
70 [563, 234, 456.7, 752, 8, 9, 27]
71 )
74class TransactionTestError(Exception):
75 """Specific error for testing transactions, to prevent misdiagnosing
76 that might otherwise occur when a standard exception is used.
77 """
78 pass
81class ButlerConfigTests(unittest.TestCase):
82 """Simple tests for ButlerConfig that are not tested in other test cases.
83 """
85 def testSearchPath(self):
86 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml")
87 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
88 config1 = ButlerConfig(configFile)
89 self.assertNotIn("testConfigs", "\n".join(cm.output))
91 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs")
92 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm:
93 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory])
94 self.assertIn("testConfigs", "\n".join(cm.output))
96 key = ("datastore", "records", "table")
97 self.assertNotEqual(config1[key], config2[key])
98 self.assertEqual(config2[key], "override_record")
101class ButlerPutGetTests:
102 """Helper method for running a suite of put/get tests from different
103 butler configurations."""
105 root = None
107 @staticmethod
108 def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
109 """Create a DatasetType and register it
110 """
111 datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
112 registry.registerDatasetType(datasetType)
113 return datasetType
115 @classmethod
116 def setUpClass(cls):
117 cls.storageClassFactory = StorageClassFactory()
118 cls.storageClassFactory.addFromConfig(cls.configFile)
120 def assertGetComponents(self, butler, datasetRef, components, reference):
121 datasetTypeName = datasetRef.datasetType.name
122 dataId = datasetRef.dataId
123 for component in components:
124 compTypeName = DatasetType.nameWithComponent(datasetTypeName, component)
125 result = butler.get(compTypeName, dataId)
126 self.assertEqual(result, getattr(reference, component))
128 def tearDown(self):
129 if self.root is not None and os.path.exists(self.root):
130 shutil.rmtree(self.root, ignore_errors=True)
132 def runPutGetTest(self, storageClass, datasetTypeName):
133 # New datasets will be added to run and tag, but we will only look in
134 # tag when looking up datasets.
135 run = "ingest/run"
136 tag = "ingest"
137 butler = Butler(self.tmpConfigFile, run=run, collections=[tag], tags=[tag])
139 # There will not be a collection yet
140 collections = set(butler.registry.queryCollections())
141 self.assertEqual(collections, set([run, tag]))
143 # Create and register a DatasetType
144 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
146 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
148 # Add needed Dimensions
149 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
150 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
151 "name": "d-r",
152 "abstract_filter": "R"})
153 butler.registry.insertDimensionData("visit_system", {"instrument": "DummyCamComp",
154 "id": 1,
155 "name": "default"})
156 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
157 "name": "fourtwentythree", "physical_filter": "d-r",
158 "visit_system": 1})
160 # Create and store a dataset
161 metric = makeExampleMetrics()
162 dataId = {"instrument": "DummyCamComp", "visit": 423}
164 # Create a DatasetRef for put
165 refIn = DatasetRef(datasetType, dataId, id=None)
167 # Put with a preexisting id should fail
168 with self.assertRaises(ValueError):
169 butler.put(metric, DatasetRef(datasetType, dataId, id=100))
171 # Put and remove the dataset once as a DatasetRef, once as a dataId,
172 # and once with a DatasetType
173 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)):
174 with self.subTest(args=args):
175 ref = butler.put(metric, *args)
176 self.assertIsInstance(ref, DatasetRef)
178 # Test getDirect
179 metricOut = butler.getDirect(ref)
180 self.assertEqual(metric, metricOut)
181 # Test get
182 metricOut = butler.get(ref.datasetType.name, dataId)
183 self.assertEqual(metric, metricOut)
184 # Test get with a datasetRef
185 metricOut = butler.get(ref)
186 self.assertEqual(metric, metricOut)
187 # Test getDeferred with dataId
188 metricOut = butler.getDeferred(ref.datasetType.name, dataId).get()
189 self.assertEqual(metric, metricOut)
190 # Test getDeferred with a datasetRef
191 metricOut = butler.getDeferred(ref).get()
192 self.assertEqual(metric, metricOut)
194 # Check we can get components
195 if storageClass.isComposite():
196 self.assertGetComponents(butler, ref,
197 ("summary", "data", "output"), metric)
199 # Remove from the tagged collection only; after that we
200 # shouldn't be able to find it unless we use the dataset_id.
201 butler.pruneDatasets([ref])
202 with self.assertRaises(LookupError):
203 butler.datasetExists(*args)
204 # Registry still knows about it, if we use the dataset_id.
205 self.assertEqual(butler.registry.getDataset(ref.id), ref)
206 # If we use the output ref with the dataset_id, we should
207 # still be able to load it with getDirect().
208 self.assertEqual(metric, butler.getDirect(ref))
210 # Reinsert into collection, then delete from Datastore *and*
211 # remove from collection.
212 butler.registry.associate(tag, [ref])
213 butler.pruneDatasets([ref], unstore=True)
214 # Lookup with original args should still fail.
215 with self.assertRaises(LookupError):
216 butler.datasetExists(*args)
217 # Now getDirect() should fail, too.
218 with self.assertRaises(FileNotFoundError):
219 butler.getDirect(ref)
220 # Registry still knows about it, if we use the dataset_id.
221 self.assertEqual(butler.registry.getDataset(ref.id), ref)
223 # Now remove the dataset completely.
224 butler.pruneDatasets([ref], purge=True, unstore=True)
225 # Lookup with original args should still fail.
226 with self.assertRaises(LookupError):
227 butler.datasetExists(*args)
228 # getDirect() should still fail.
229 with self.assertRaises(FileNotFoundError):
230 butler.getDirect(ref)
231 # Registry shouldn't be able to find it by dataset_id anymore.
232 self.assertIsNone(butler.registry.getDataset(ref.id))
234 # Put the dataset again, since the last thing we did was remove it.
235 ref = butler.put(metric, refIn)
237 # Get with parameters
238 stop = 4
239 sliced = butler.get(ref, parameters={"slice": slice(stop)})
240 self.assertNotEqual(metric, sliced)
241 self.assertEqual(metric.summary, sliced.summary)
242 self.assertEqual(metric.output, sliced.output)
243 self.assertEqual(metric.data[:stop], sliced.data)
244 # getDeferred with parameters
245 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get()
246 self.assertNotEqual(metric, sliced)
247 self.assertEqual(metric.summary, sliced.summary)
248 self.assertEqual(metric.output, sliced.output)
249 self.assertEqual(metric.data[:stop], sliced.data)
250 # getDeferred with deferred parameters
251 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)})
252 self.assertNotEqual(metric, sliced)
253 self.assertEqual(metric.summary, sliced.summary)
254 self.assertEqual(metric.output, sliced.output)
255 self.assertEqual(metric.data[:stop], sliced.data)
257 if storageClass.isComposite():
258 # Delete one component and check that the other components
259 # can still be retrieved
260 metricOut = butler.get(ref.datasetType.name, dataId)
261 compNameS = DatasetType.nameWithComponent(datasetTypeName, "summary")
262 compNameD = DatasetType.nameWithComponent(datasetTypeName, "data")
263 summary = butler.get(compNameS, dataId)
264 self.assertEqual(summary, metric.summary)
265 self.assertTrue(butler.datastore.exists(ref.components["summary"]))
267 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections)
268 butler.pruneDatasets([compRef], unstore=True)
269 with self.assertRaises(LookupError):
270 butler.datasetExists(compNameS, dataId)
271 self.assertFalse(butler.datastore.exists(ref.components["summary"]))
272 self.assertTrue(butler.datastore.exists(ref.components["data"]))
273 data = butler.get(compNameD, dataId)
274 self.assertEqual(data, metric.data)
276 # Create a Dataset type that has the same name but is inconsistent.
277 inconsistentDatasetType = DatasetType(datasetTypeName, dimensions,
278 self.storageClassFactory.getStorageClass("Config"))
280 # Getting with a dataset type that does not match registry fails
281 with self.assertRaises(ValueError):
282 butler.get(inconsistentDatasetType, dataId)
284 # Combining a DatasetRef with a dataId should fail
285 with self.assertRaises(ValueError):
286 butler.get(ref, dataId)
287 # Getting with an explicit ref should fail if the id doesn't match
288 with self.assertRaises(ValueError):
289 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101))
291 # Getting a dataset with unknown parameters should fail
292 with self.assertRaises(KeyError):
293 butler.get(ref, parameters={"unsupported": True})
295 # Check we have a collection
296 collections = set(butler.registry.queryCollections())
297 self.assertEqual(collections, {run, tag})
299 # Clean up to check that we can remove something that may have
300 # already had a component removed
301 butler.pruneDatasets([ref], unstore=True, purge=True)
303 # Add a dataset back in since some downstream tests require
304 # something to be present
305 ref = butler.put(metric, refIn)
307 return butler
309 def testDeferredCollectionPassing(self):
310 # Construct a butler with no run or collection, but make it writeable.
311 butler = Butler(self.tmpConfigFile, writeable=True)
312 # Create and register a DatasetType
313 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
314 datasetType = self.addDatasetType("example", dimensions,
315 self.storageClassFactory.getStorageClass("StructuredData"),
316 butler.registry)
317 # Add needed Dimensions
318 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
319 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
320 "name": "d-r",
321 "abstract_filter": "R"})
322 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
323 "name": "fourtwentythree", "physical_filter": "d-r"})
324 dataId = {"instrument": "DummyCamComp", "visit": 423}
325 # Create dataset.
326 metric = makeExampleMetrics()
327 # Register a new run and put dataset.
328 run = "deferred"
329 butler.registry.registerRun(run)
330 ref = butler.put(metric, datasetType, dataId, run=run)
331 # Putting with no run should fail with TypeError.
332 with self.assertRaises(TypeError):
333 butler.put(metric, datasetType, dataId)
334 # Dataset should exist.
335 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
336 # We should be able to get the dataset back, but with and without
337 # a deferred dataset handle.
338 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run]))
339 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get())
340 # Trying to find the dataset without any collection is a TypeError.
341 with self.assertRaises(TypeError):
342 butler.datasetExists(datasetType, dataId)
343 with self.assertRaises(TypeError):
344 butler.get(datasetType, dataId)
345 # Associate the dataset with a different collection.
346 butler.registry.registerCollection("tagged")
347 butler.registry.associate("tagged", [ref])
348 # Deleting the dataset from the new collection should make it findable
349 # in the original collection.
350 butler.pruneDatasets([ref], tags=["tagged"])
351 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run]))
354class ButlerTests(ButlerPutGetTests):
355 """Tests for Butler.
356 """
357 useTempRoot = True
359 def setUp(self):
360 """Create a new butler root for each test."""
361 if self.useTempRoot:
362 self.root = tempfile.mkdtemp(dir=TESTDIR)
363 Butler.makeRepo(self.root, config=Config(self.configFile))
364 self.tmpConfigFile = os.path.join(self.root, "butler.yaml")
365 else:
366 self.root = None
367 self.tmpConfigFile = self.configFile
369 def testConstructor(self):
370 """Independent test of constructor.
371 """
372 butler = Butler(self.tmpConfigFile, run="ingest")
373 self.assertIsInstance(butler, Butler)
375 collections = set(butler.registry.queryCollections())
376 self.assertEqual(collections, {"ingest"})
378 butler2 = Butler(butler=butler, collections=["other"])
379 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"]))
380 self.assertIsNone(butler2.run)
381 self.assertIs(butler.registry, butler2.registry)
382 self.assertIs(butler.datastore, butler2.datastore)
384 def testBasicPutGet(self):
385 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
386 self.runPutGetTest(storageClass, "test_metric")
388 def testCompositePutGetConcrete(self):
389 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
390 self.runPutGetTest(storageClass, "test_metric")
392 def testCompositePutGetVirtual(self):
393 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite")
394 self.runPutGetTest(storageClass, "test_metric_comp")
396 def testIngest(self):
397 butler = Butler(self.tmpConfigFile, run="ingest")
399 # Create and register a DatasetType
400 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
402 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml")
403 datasetTypeName = "metric"
405 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
407 # Add needed Dimensions
408 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
409 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
410 "name": "d-r",
411 "abstract_filter": "R"})
412 for detector in (1, 2):
413 butler.registry.insertDimensionData("detector", {"instrument": "DummyCamComp", "id": detector,
414 "full_name": f"detector{detector}"})
416 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423,
417 "name": "fourtwentythree", "physical_filter": "d-r"},
418 {"instrument": "DummyCamComp", "id": 424,
419 "name": "fourtwentyfour", "physical_filter": "d-r"})
421 formatter = doImport("lsst.daf.butler.formatters.yamlFormatter.YamlFormatter")
422 dataRoot = os.path.join(TESTDIR, "data", "basic")
423 datasets = []
424 for detector in (1, 2):
425 detector_name = f"detector_{detector}"
426 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml")
427 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector}
428 # Create a DatasetRef for ingest
429 refIn = DatasetRef(datasetType, dataId, id=None)
431 datasets.append(FileDataset(path=metricFile,
432 refs=[refIn],
433 formatter=formatter))
435 butler.ingest(*datasets, transfer="copy")
437 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423}
438 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423}
440 metrics1 = butler.get(datasetTypeName, dataId1)
441 metrics2 = butler.get(datasetTypeName, dataId2)
442 self.assertNotEqual(metrics1, metrics2)
444 # Compare URIs
445 uri1 = butler.getUri(datasetTypeName, dataId1)
446 uri2 = butler.getUri(datasetTypeName, dataId2)
447 self.assertNotEqual(uri1, uri2)
449 # Now do a multi-dataset but single file ingest
450 metricFile = os.path.join(dataRoot, "detectors.yaml")
451 refs = []
452 for detector in (1, 2):
453 detector_name = f"detector_{detector}"
454 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector}
455 # Create a DatasetRef for ingest
456 refs.append(DatasetRef(datasetType, dataId, id=None))
458 datasets = []
459 datasets.append(FileDataset(path=metricFile,
460 refs=refs,
461 formatter=MultiDetectorFormatter))
463 butler.ingest(*datasets, transfer="copy")
465 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424}
466 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424}
468 multi1 = butler.get(datasetTypeName, dataId1)
469 multi2 = butler.get(datasetTypeName, dataId2)
471 self.assertEqual(multi1, metrics1)
472 self.assertEqual(multi2, metrics2)
474 # Compare URIs
475 uri1 = butler.getUri(datasetTypeName, dataId1)
476 uri2 = butler.getUri(datasetTypeName, dataId2)
477 self.assertEqual(uri1, uri2)
479 # Test that removing one does not break the second
480 # This line will issue a warning log message for a ChainedDatastore
481 # that uses an InMemoryDatastore since in-memory can not ingest
482 # files.
483 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False)
484 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
485 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
486 multi2b = butler.get(datasetTypeName, dataId2)
487 self.assertEqual(multi2, multi2b)
489 def testPruneCollections(self):
490 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
491 butler = Butler(self.tmpConfigFile, writeable=True)
492 # Load registry data with dimensions to hang datasets off of.
493 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry"))
494 butler.import_(filename=os.path.join(registryDataDir, "base.yaml"))
495 # Add some RUN-type collections.
496 run1 = "run1"
497 butler.registry.registerRun(run1)
498 run2 = "run2"
499 butler.registry.registerRun(run2)
500 # put some datasets. ref1 and ref2 have the same data ID, and are in
501 # different runs. ref3 has a different data ID.
502 metric = makeExampleMetrics()
503 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"])
504 datasetType = self.addDatasetType("prune_collections_test_dataset", dimensions, storageClass,
505 butler.registry)
506 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1)
507 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2)
508 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1)
509 # Try to delete a RUN collection without purge, or with purge and not
510 # unstore.
511 with self.assertRaises(TypeError):
512 butler.pruneCollection(run1)
513 with self.assertRaises(TypeError):
514 butler.pruneCollection(run2, purge=True)
515 # Add a TAGGED collection and associate ref3 only into it.
516 tag1 = "tag1"
517 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
518 butler.registry.associate(tag1, [ref3])
519 # Add a CHAINED collection that searches run1 and then run2. It
520 # logically contains only ref1, because ref2 is shadowed due to them
521 # having the same data ID and dataset type.
522 chain1 = "chain1"
523 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
524 butler.registry.setCollectionChain(chain1, [run1, run2])
525 # Try to delete RUN collections, which should fail with complete
526 # rollback because they're still referenced by the CHAINED
527 # collection.
528 with self.assertRaises(Exception):
529 butler.pruneCollection(run1, pruge=True, unstore=True)
530 with self.assertRaises(Exception):
531 butler.pruneCollection(run2, pruge=True, unstore=True)
532 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
533 [ref1, ref2, ref3])
534 self.assertTrue(butler.datastore.exists(ref1))
535 self.assertTrue(butler.datastore.exists(ref2))
536 self.assertTrue(butler.datastore.exists(ref3))
537 # Try to delete CHAINED and TAGGED collections with purge; should not
538 # work.
539 with self.assertRaises(TypeError):
540 butler.pruneCollection(tag1, purge=True, unstore=True)
541 with self.assertRaises(TypeError):
542 butler.pruneCollection(chain1, purge=True, unstore=True)
543 # Remove the tagged collection with unstore=False. This should not
544 # affect the datasets.
545 butler.pruneCollection(tag1)
546 with self.assertRaises(MissingCollectionError):
547 butler.registry.getCollectionType(tag1)
548 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
549 [ref1, ref2, ref3])
550 self.assertTrue(butler.datastore.exists(ref1))
551 self.assertTrue(butler.datastore.exists(ref2))
552 self.assertTrue(butler.datastore.exists(ref3))
553 # Add the tagged collection back in, and remove it with unstore=True.
554 # This should remove ref3 only from the datastore.
555 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED)
556 butler.registry.associate(tag1, [ref3])
557 butler.pruneCollection(tag1, unstore=True)
558 with self.assertRaises(MissingCollectionError):
559 butler.registry.getCollectionType(tag1)
560 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
561 [ref1, ref2, ref3])
562 self.assertTrue(butler.datastore.exists(ref1))
563 self.assertTrue(butler.datastore.exists(ref2))
564 self.assertFalse(butler.datastore.exists(ref3))
565 # Delete the chain with unstore=False. The datasets should not be
566 # affected at all.
567 butler.pruneCollection(chain1)
568 with self.assertRaises(MissingCollectionError):
569 butler.registry.getCollectionType(chain1)
570 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
571 [ref1, ref2, ref3])
572 self.assertTrue(butler.datastore.exists(ref1))
573 self.assertTrue(butler.datastore.exists(ref2))
574 self.assertFalse(butler.datastore.exists(ref3))
575 # Redefine and then delete the chain with unstore=True. Only ref1
576 # should be unstored (ref3 has already been unstored, but otherwise
577 # would be now).
578 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED)
579 butler.registry.setCollectionChain(chain1, [run1, run2])
580 butler.pruneCollection(chain1, unstore=True)
581 with self.assertRaises(MissingCollectionError):
582 butler.registry.getCollectionType(chain1)
583 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
584 [ref1, ref2, ref3])
585 self.assertFalse(butler.datastore.exists(ref1))
586 self.assertTrue(butler.datastore.exists(ref2))
587 self.assertFalse(butler.datastore.exists(ref3))
588 # Remove run1. This removes ref1 and ref3 from the registry (they're
589 # already gone from the datastore, which is fine).
590 butler.pruneCollection(run1, purge=True, unstore=True)
591 with self.assertRaises(MissingCollectionError):
592 butler.registry.getCollectionType(run1)
593 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
594 [ref2])
595 self.assertTrue(butler.datastore.exists(ref2))
596 # Remove run2. This removes ref2 from the registry and the datastore.
597 butler.pruneCollection(run2, purge=True, unstore=True)
598 with self.assertRaises(MissingCollectionError):
599 butler.registry.getCollectionType(run2)
600 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)),
601 [])
603 def testPickle(self):
604 """Test pickle support.
605 """
606 butler = Butler(self.tmpConfigFile, run="ingest")
607 butlerOut = pickle.loads(pickle.dumps(butler))
608 self.assertIsInstance(butlerOut, Butler)
609 self.assertEqual(butlerOut._config, butler._config)
610 self.assertEqual(butlerOut.collections, butler.collections)
611 self.assertEqual(butlerOut.run, butler.run)
613 def testGetDatasetTypes(self):
614 butler = Butler(self.tmpConfigFile, run="ingest")
615 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
616 dimensionEntries = [
617 ("instrument", {"instrument": "DummyCam"}, {"instrument": "DummyHSC"},
618 {"instrument": "DummyCamComp"}),
619 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
620 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"})
621 ]
622 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
623 # Add needed Dimensions
624 for args in dimensionEntries:
625 butler.registry.insertDimensionData(*args)
627 # When a DatasetType is added to the registry entries are created
628 # for each component. Need entries for each component in the test
629 # configuration otherwise validation won't work. The ones that
630 # are deliberately broken will be ignored later.
631 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi"}
632 components = set()
633 for datasetTypeName in datasetTypeNames:
634 # Create and register a DatasetType
635 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
637 for componentName in storageClass.components:
638 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName))
640 fromRegistry = set(butler.registry.queryDatasetTypes())
641 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components)
643 # Now that we have some dataset types registered, validate them
644 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
645 "datasetType.component"])
647 # Add a new datasetType that will fail template validation
648 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry)
649 if self.validationCanFail:
650 with self.assertRaises(ValidationError):
651 butler.validateConfiguration()
653 # Rerun validation but with a subset of dataset type names
654 butler.validateConfiguration(datasetTypeNames=["metric4"])
656 # Rerun validation but ignore the bad datasetType
657 butler.validateConfiguration(ignore=["test_metric_comp", "metric3", "calexp", "DummySC",
658 "datasetType.component"])
660 def testTransaction(self):
661 butler = Butler(self.tmpConfigFile, run="ingest")
662 datasetTypeName = "test_metric"
663 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
664 dimensionEntries = (("instrument", {"instrument": "DummyCam"}),
665 ("physical_filter", {"instrument": "DummyCam", "name": "d-r",
666 "abstract_filter": "R"}),
667 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo",
668 "physical_filter": "d-r"}))
669 storageClass = self.storageClassFactory.getStorageClass("StructuredData")
670 metric = makeExampleMetrics()
671 dataId = {"instrument": "DummyCam", "visit": 42}
672 with self.assertRaises(TransactionTestError):
673 with butler.transaction():
674 # Create and register a DatasetType
675 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry)
676 # Add needed Dimensions
677 for args in dimensionEntries:
678 butler.registry.insertDimensionData(*args)
679 # Store a dataset
680 ref = butler.put(metric, datasetTypeName, dataId)
681 self.assertIsInstance(ref, DatasetRef)
682 # Test getDirect
683 metricOut = butler.getDirect(ref)
684 self.assertEqual(metric, metricOut)
685 # Test get
686 metricOut = butler.get(datasetTypeName, dataId)
687 self.assertEqual(metric, metricOut)
688 # Check we can get components
689 self.assertGetComponents(butler, ref,
690 ("summary", "data", "output"), metric)
691 raise TransactionTestError("This should roll back the entire transaction")
693 with self.assertRaises(KeyError):
694 butler.registry.getDatasetType(datasetTypeName)
695 with self.assertRaises(LookupError):
696 butler.registry.expandDataId(dataId)
697 # Should raise KeyError for missing DatasetType
698 with self.assertRaises(KeyError):
699 butler.get(datasetTypeName, dataId)
700 # Also check explicitly if Dataset entry is missing
701 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections))
702 # Direct retrieval should not find the file in the Datastore
703 with self.assertRaises(FileNotFoundError):
704 butler.getDirect(ref)
706 def testMakeRepo(self):
707 """Test that we can write butler configuration to a new repository via
708 the Butler.makeRepo interface and then instantiate a butler from the
709 repo root.
710 """
711 # Do not run the test if we know this datastore configuration does
712 # not support a file system root
713 if self.fullConfigKey is None:
714 return
716 # Remove the file created in setUp
717 os.unlink(self.tmpConfigFile)
719 butlerConfig = Butler.makeRepo(self.root, config=Config(self.configFile))
720 limited = Config(self.configFile)
721 butler1 = Butler(butlerConfig)
722 butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
723 config=Config(self.configFile), overwrite=True)
724 full = Config(self.tmpConfigFile)
725 butler2 = Butler(butlerConfig)
726 # Butlers should have the same configuration regardless of whether
727 # defaults were expanded.
728 self.assertEqual(butler1._config, butler2._config)
729 # Config files loaded directly should not be the same.
730 self.assertNotEqual(limited, full)
731 # Make sure "limited" doesn't have a few keys we know it should be
732 # inheriting from defaults.
733 self.assertIn(self.fullConfigKey, full)
734 self.assertNotIn(self.fullConfigKey, limited)
736 # Collections don't appear until something is put in them
737 collections1 = set(butler1.registry.queryCollections())
738 self.assertEqual(collections1, set())
739 self.assertEqual(set(butler2.registry.queryCollections()), collections1)
741 # Check that a config with no associated file name will not
742 # work properly with relocatable Butler repo
743 butlerConfig.configFile = None
744 with self.assertRaises(ValueError):
745 Butler(butlerConfig)
747 with self.assertRaises(FileExistsError):
748 Butler.makeRepo(self.root, standalone=True, createRegistry=False,
749 config=Config(self.configFile), overwrite=False)
751 def testStringification(self):
752 butler = Butler(self.tmpConfigFile, run="ingest")
753 butlerStr = str(butler)
755 if self.datastoreStr is not None:
756 for testStr in self.datastoreStr:
757 self.assertIn(testStr, butlerStr)
758 if self.registryStr is not None:
759 self.assertIn(self.registryStr, butlerStr)
761 datastoreName = butler.datastore.name
762 if self.datastoreName is not None:
763 for testStr in self.datastoreName:
764 self.assertIn(testStr, datastoreName)
767class FileLikeDatastoreButlerTests(ButlerTests):
768 """Common tests and specialization of ButlerTests for butlers backed
769 by datastores that inherit from FileLikeDatastore.
770 """
772 def checkFileExists(self, root, path):
773 """Checks if file exists at a given path (relative to root).
775 Test testPutTemplates verifies actual physical existance of the files
776 in the requested location. For POSIXDatastore this test is equivalent
777 to `os.path.exists` call.
778 """
779 return os.path.exists(os.path.join(root, path))
781 def testPutTemplates(self):
782 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
783 butler = Butler(self.tmpConfigFile, run="ingest")
785 # Add needed Dimensions
786 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
787 butler.registry.insertDimensionData("physical_filter", {"instrument": "DummyCamComp",
788 "name": "d-r",
789 "abstract_filter": "R"})
790 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423",
791 "physical_filter": "d-r"})
792 butler.registry.insertDimensionData("visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425",
793 "physical_filter": "d-r"})
795 # Create and store a dataset
796 metric = makeExampleMetrics()
798 # Create two almost-identical DatasetTypes (both will use default
799 # template)
800 dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
801 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass))
802 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass))
803 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass))
805 dataId1 = {"instrument": "DummyCamComp", "visit": np.int64(423)}
806 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"}
807 dataId3 = {"instrument": "DummyCamComp", "visit": 425}
809 # Put with exactly the data ID keys needed
810 ref = butler.put(metric, "metric1", dataId1)
811 self.assertTrue(self.checkFileExists(butler.datastore.root,
812 "ingest/metric1/d-r/DummyCamComp_423.pickle"))
814 # Check the template based on dimensions
815 butler.datastore.templates.validateTemplates([ref])
817 # Put with extra data ID keys (physical_filter is an optional
818 # dependency); should not change template (at least the way we're
819 # defining them to behave now; the important thing is that they
820 # must be consistent).
821 ref = butler.put(metric, "metric2", dataId2)
822 self.assertTrue(self.checkFileExists(butler.datastore.root,
823 "ingest/metric2/d-r/DummyCamComp_v423.pickle"))
825 # Check the template based on dimensions
826 butler.datastore.templates.validateTemplates([ref])
828 # Now use a file template that will not result in unique filenames
829 ref = butler.put(metric, "metric3", dataId1)
831 # Check the template based on dimensions. This one is a bad template
832 with self.assertRaises(FileTemplateValidationError):
833 butler.datastore.templates.validateTemplates([ref])
835 with self.assertRaises(FileExistsError):
836 butler.put(metric, "metric3", dataId3)
838 def testImportExport(self):
839 # Run put/get tests just to create and populate a repo.
840 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
841 exportButler = self.runPutGetTest(storageClass, "test_metric")
842 # Test that the repo actually has at least one dataset.
843 datasets = list(exportButler.registry.queryDatasets(..., collections=...))
844 self.assertGreater(len(datasets), 0)
845 # Export those datasets. We used TemporaryDirectory because there
846 # doesn't seem to be a way to get the filename (as opposed to the file
847 # object) from any of tempfile's temporary-file context managers.
848 with tempfile.TemporaryDirectory() as exportDir:
849 # TODO: When PosixDatastore supports transfer-on-exist, add tests
850 # for that.
851 exportFile = os.path.join(exportDir, "exports.yaml")
852 with exportButler.export(filename=exportFile) as export:
853 export.saveDatasets(datasets)
854 self.assertTrue(os.path.exists(exportFile))
855 with tempfile.TemporaryDirectory() as importDir:
856 Butler.makeRepo(importDir, config=Config(self.configFile))
857 importButler = Butler(importDir, run="ingest/run")
858 importButler.import_(filename=exportFile, directory=exportButler.datastore.root,
859 transfer="symlink")
860 for ref in datasets:
861 with self.subTest(ref=ref):
862 # Test for existence by passing in the DatasetType and
863 # data ID separately, to avoid lookup by dataset_id.
864 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId))
867class PosixDatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
868 """PosixDatastore specialization of a butler"""
869 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
870 fullConfigKey = ".datastore.formatters"
871 validationCanFail = True
872 datastoreStr = ["/tmp"]
873 datastoreName = [f"PosixDatastore@{BUTLER_ROOT_TAG}"]
874 registryStr = "/gen3.sqlite3"
877class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
878 """InMemoryDatastore specialization of a butler"""
879 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml")
880 fullConfigKey = None
881 useTempRoot = False
882 validationCanFail = False
883 datastoreStr = ["datastore='InMemory"]
884 datastoreName = ["InMemoryDatastore@"]
885 registryStr = ":memory:"
887 def testIngest(self):
888 pass
891class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase):
892 """PosixDatastore specialization"""
893 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
894 fullConfigKey = ".datastore.datastores.1.formatters"
895 validationCanFail = True
896 datastoreStr = ["datastore='InMemory", "/PosixDatastore_1,", "/PosixDatastore_2'"]
897 datastoreName = ["InMemoryDatastore@", f"PosixDatastore@{BUTLER_ROOT_TAG}/PosixDatastore_1",
898 "SecondDatastore"]
899 registryStr = "/gen3.sqlite3"
902class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase):
903 """Test that a yaml file in one location can refer to a root in another."""
905 datastoreStr = ["dir1"]
906 # Disable the makeRepo test since we are deliberately not using
907 # butler.yaml as the config name.
908 fullConfigKey = None
910 def setUp(self):
911 self.root = tempfile.mkdtemp(dir=TESTDIR)
913 # Make a new repository in one place
914 self.dir1 = os.path.join(self.root, "dir1")
915 Butler.makeRepo(self.dir1, config=Config(self.configFile))
917 # Move the yaml file to a different place and add a "root"
918 self.dir2 = os.path.join(self.root, "dir2")
919 safeMakeDir(self.dir2)
920 configFile1 = os.path.join(self.dir1, "butler.yaml")
921 config = Config(configFile1)
922 config["root"] = self.dir1
923 configFile2 = os.path.join(self.dir2, "butler2.yaml")
924 config.dumpToFile(configFile2)
925 os.remove(configFile1)
926 self.tmpConfigFile = configFile2
928 def testFileLocations(self):
929 self.assertNotEqual(self.dir1, self.dir2)
930 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml")))
931 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml")))
932 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3")))
935class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase):
936 """Test that a config file created by makeRepo outside of repo works."""
938 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
940 def setUp(self):
941 self.root = tempfile.mkdtemp(dir=TESTDIR)
942 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
944 self.tmpConfigFile = os.path.join(self.root2, "different.yaml")
945 Butler.makeRepo(self.root, config=Config(self.configFile),
946 outfile=self.tmpConfigFile)
948 def tearDown(self):
949 if os.path.exists(self.root2):
950 shutil.rmtree(self.root2, ignore_errors=True)
951 super().tearDown()
953 def testConfigExistence(self):
954 c = Config(self.tmpConfigFile)
955 uri_config = ButlerURI(c["root"])
956 uri_expected = ButlerURI(self.root)
957 self.assertEqual(uri_config.geturl(), uri_expected.geturl())
958 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path")
960 def testPutGet(self):
961 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
962 self.runPutGetTest(storageClass, "test_metric")
965class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase):
966 """Test that a config file created by makeRepo outside of repo works."""
968 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
970 def setUp(self):
971 self.root = tempfile.mkdtemp(dir=TESTDIR)
972 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
974 self.tmpConfigFile = self.root2
975 Butler.makeRepo(self.root, config=Config(self.configFile),
976 outfile=self.tmpConfigFile)
978 def testConfigExistence(self):
979 # Append the yaml file else Config constructor does not know the file
980 # type.
981 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml")
982 super().testConfigExistence()
985class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase):
986 """Test that a config file created by makeRepo outside of repo works."""
988 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
990 def setUp(self):
991 self.root = tempfile.mkdtemp(dir=TESTDIR)
992 self.root2 = tempfile.mkdtemp(dir=TESTDIR)
994 self.tmpConfigFile = ButlerURI(os.path.join(self.root2, "something.yaml")).geturl()
995 Butler.makeRepo(self.root, config=Config(self.configFile),
996 outfile=self.tmpConfigFile)
999@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!")
1000@mock_s3
1001class S3DatastoreButlerTestCase(FileLikeDatastoreButlerTests, unittest.TestCase):
1002 """S3Datastore specialization of a butler; an S3 storage Datastore +
1003 a local in-memory SqlRegistry.
1004 """
1005 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml")
1006 fullConfigKey = None
1007 validationCanFail = True
1009 bucketName = "anybucketname"
1010 """Name of the Bucket that will be used in the tests. The name is read from
1011 the config file used with the tests during set-up.
1012 """
1014 root = "butlerRoot/"
1015 """Root repository directory expected to be used in case useTempRoot=False.
1016 Otherwise the root is set to a 20 characters long randomly generated string
1017 during set-up.
1018 """
1020 datastoreStr = [f"datastore={root}"]
1021 """Contains all expected root locations in a format expected to be
1022 returned by Butler stringification.
1023 """
1025 datastoreName = ["S3Datastore@s3://{bucketName}/{root}"]
1026 """The expected format of the S3Datastore string."""
1028 registryStr = f":memory:"
1029 """Expected format of the Registry string."""
1031 def genRoot(self):
1032 """Returns a random string of len 20 to serve as a root
1033 name for the temporary bucket repo.
1035 This is equivalent to tempfile.mkdtemp as this is what self.root
1036 becomes when useTempRoot is True.
1037 """
1038 rndstr = "".join(
1039 random.choice(string.ascii_uppercase + string.digits) for _ in range(20)
1040 )
1041 return rndstr + "/"
1043 def setUp(self):
1044 config = Config(self.configFile)
1045 uri = ButlerURI(config[".datastore.datastore.root"])
1046 self.bucketName = uri.netloc
1048 # set up some fake credentials if they do not exist
1049 self.usingDummyCredentials = setAwsEnvCredentials()
1051 if self.useTempRoot:
1052 self.root = self.genRoot()
1053 rooturi = f"s3://{self.bucketName}/{self.root}"
1054 config.update({"datastore": {"datastore": {"root": rooturi}}})
1056 # MOTO needs to know that we expect Bucket bucketname to exist
1057 # (this used to be the class attribute bucketName)
1058 s3 = boto3.resource("s3")
1059 s3.create_bucket(Bucket=self.bucketName)
1061 self.datastoreStr = f"datastore={self.root}"
1062 self.datastoreName = [f"S3Datastore@{rooturi}"]
1063 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False)
1064 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml")
1066 def tearDown(self):
1067 s3 = boto3.resource("s3")
1068 bucket = s3.Bucket(self.bucketName)
1069 try:
1070 bucket.objects.all().delete()
1071 except botocore.exceptions.ClientError as e:
1072 if e.response["Error"]["Code"] == "404":
1073 # the key was not reachable - pass
1074 pass
1075 else:
1076 raise
1078 bucket = s3.Bucket(self.bucketName)
1079 bucket.delete()
1081 # unset any potentially set dummy credentials
1082 if self.usingDummyCredentials:
1083 unsetAwsEnvCredentials()
1085 def checkFileExists(self, root, relpath):
1086 """Checks if file exists at a given path (relative to root).
1088 Test testPutTemplates verifies actual physical existance of the files
1089 in the requested location. For S3Datastore this test is equivalent to
1090 `lsst.daf.butler.core.s3utils.s3checkFileExists` call.
1091 """
1092 uri = ButlerURI(root)
1093 client = boto3.client("s3")
1094 return s3CheckFileExists(uri, client=client)[0]
1096 @unittest.expectedFailure
1097 def testImportExport(self):
1098 super().testImportExport()
1101if __name__ == "__main__": 1101 ↛ 1102line 1101 didn't jump to line 1102, because the condition on line 1101 was never true
1102 unittest.main()