Coverage for tests/test_butler.py: 15%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1138 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.http import _is_webdav_endpoint 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 default_run = "ingésτ😺" 

136 

137 @staticmethod 

138 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

139 """Create a DatasetType and register it""" 

140 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

141 registry.registerDatasetType(datasetType) 

142 return datasetType 

143 

144 @classmethod 

145 def setUpClass(cls): 

146 cls.storageClassFactory = StorageClassFactory() 

147 cls.storageClassFactory.addFromConfig(cls.configFile) 

148 

149 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

150 datasetType = datasetRef.datasetType 

151 dataId = datasetRef.dataId 

152 deferred = butler.getDirectDeferred(datasetRef) 

153 

154 for component in components: 

155 compTypeName = datasetType.componentTypeName(component) 

156 result = butler.get(compTypeName, dataId, collections=collections) 

157 self.assertEqual(result, getattr(reference, component)) 

158 result_deferred = deferred.get(component=component) 

159 self.assertEqual(result_deferred, result) 

160 

161 def tearDown(self): 

162 removeTestTempDir(self.root) 

163 

164 def create_butler(self, run, storageClass, datasetTypeName): 

165 butler = Butler(self.tmpConfigFile, run=run) 

166 

167 collections = set(butler.registry.queryCollections()) 

168 self.assertEqual(collections, set([run])) 

169 

170 # Create and register a DatasetType 

171 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

172 

173 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

174 

175 # Add needed Dimensions 

176 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

177 butler.registry.insertDimensionData( 

178 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

179 ) 

180 butler.registry.insertDimensionData( 

181 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

182 ) 

183 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

184 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

185 butler.registry.insertDimensionData( 

186 "visit", 

187 { 

188 "instrument": "DummyCamComp", 

189 "id": 423, 

190 "name": "fourtwentythree", 

191 "physical_filter": "d-r", 

192 "visit_system": 1, 

193 "datetime_begin": visit_start, 

194 "datetime_end": visit_end, 

195 }, 

196 ) 

197 

198 # Add more visits for some later tests 

199 for visit_id in (424, 425): 

200 butler.registry.insertDimensionData( 

201 "visit", 

202 { 

203 "instrument": "DummyCamComp", 

204 "id": visit_id, 

205 "name": f"fourtwentyfour_{visit_id}", 

206 "physical_filter": "d-r", 

207 "visit_system": 1, 

208 }, 

209 ) 

210 return butler, datasetType 

211 

212 def runPutGetTest(self, storageClass, datasetTypeName): 

213 # New datasets will be added to run and tag, but we will only look in 

214 # tag when looking up datasets. 

215 run = self.default_run 

216 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

217 

218 # Create and store a dataset 

219 metric = makeExampleMetrics() 

220 dataId = {"instrument": "DummyCamComp", "visit": 423} 

221 

222 # Create a DatasetRef for put 

223 refIn = DatasetRef(datasetType, dataId, id=None) 

224 

225 # Put with a preexisting id should fail 

226 with self.assertRaises(ValueError): 

227 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

228 

229 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

230 # and once with a DatasetType 

231 

232 # Keep track of any collections we add and do not clean up 

233 expected_collections = {run} 

234 

235 counter = 0 

236 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

237 # Since we are using subTest we can get cascading failures 

238 # here with the first attempt failing and the others failing 

239 # immediately because the dataset already exists. Work around 

240 # this by using a distinct run collection each time 

241 counter += 1 

242 this_run = f"put_run_{counter}" 

243 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

244 expected_collections.update({this_run}) 

245 

246 with self.subTest(args=args): 

247 ref = butler.put(metric, *args, run=this_run) 

248 self.assertIsInstance(ref, DatasetRef) 

249 

250 # Test getDirect 

251 metricOut = butler.getDirect(ref) 

252 self.assertEqual(metric, metricOut) 

253 # Test get 

254 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

255 self.assertEqual(metric, metricOut) 

256 # Test get with a datasetRef 

257 metricOut = butler.get(ref, collections=this_run) 

258 self.assertEqual(metric, metricOut) 

259 # Test getDeferred with dataId 

260 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

261 self.assertEqual(metric, metricOut) 

262 # Test getDeferred with a datasetRef 

263 metricOut = butler.getDeferred(ref, collections=this_run).get() 

264 self.assertEqual(metric, metricOut) 

265 # and deferred direct with ref 

266 metricOut = butler.getDirectDeferred(ref).get() 

267 self.assertEqual(metric, metricOut) 

268 

269 # Check we can get components 

270 if storageClass.isComposite(): 

271 self.assertGetComponents( 

272 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

273 ) 

274 

275 # Can the artifacts themselves be retrieved? 

276 if not butler.datastore.isEphemeral: 

277 root_uri = ResourcePath(self.root) 

278 

279 for preserve_path in (True, False): 

280 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

281 # Use copy so that we can test that overwrite 

282 # protection works (using "auto" for File URIs would 

283 # use hard links and subsequent transfer would work 

284 # because it knows they are the same file). 

285 transferred = butler.retrieveArtifacts( 

286 [ref], destination, preserve_path=preserve_path, transfer="copy" 

287 ) 

288 self.assertGreater(len(transferred), 0) 

289 artifacts = list(ResourcePath.findFileResources([destination])) 

290 self.assertEqual(set(transferred), set(artifacts)) 

291 

292 for artifact in transferred: 

293 path_in_destination = artifact.relative_to(destination) 

294 self.assertIsNotNone(path_in_destination) 

295 

296 # when path is not preserved there should not be 

297 # any path separators. 

298 num_seps = path_in_destination.count("/") 

299 if preserve_path: 

300 self.assertGreater(num_seps, 0) 

301 else: 

302 self.assertEqual(num_seps, 0) 

303 

304 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

305 n_uris = len(secondary_uris) 

306 if primary_uri: 

307 n_uris += 1 

308 self.assertEqual( 

309 len(artifacts), 

310 n_uris, 

311 "Comparing expected artifacts vs actual:" 

312 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

313 ) 

314 

315 if preserve_path: 

316 # No need to run these twice 

317 with self.assertRaises(ValueError): 

318 butler.retrieveArtifacts([ref], destination, transfer="move") 

319 

320 with self.assertRaises(FileExistsError): 

321 butler.retrieveArtifacts([ref], destination) 

322 

323 transferred_again = butler.retrieveArtifacts( 

324 [ref], destination, preserve_path=preserve_path, overwrite=True 

325 ) 

326 self.assertEqual(set(transferred_again), set(transferred)) 

327 

328 # Now remove the dataset completely. 

329 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

330 # Lookup with original args should still fail. 

331 with self.assertRaises(LookupError): 

332 butler.datasetExists(*args, collections=this_run) 

333 # getDirect() should still fail. 

334 with self.assertRaises(FileNotFoundError): 

335 butler.getDirect(ref) 

336 # Registry shouldn't be able to find it by dataset_id anymore. 

337 self.assertIsNone(butler.registry.getDataset(ref.id)) 

338 

339 # Do explicit registry removal since we know they are 

340 # empty 

341 butler.registry.removeCollection(this_run) 

342 expected_collections.remove(this_run) 

343 

344 # Put the dataset again, since the last thing we did was remove it 

345 # and we want to use the default collection. 

346 ref = butler.put(metric, refIn) 

347 

348 # Get with parameters 

349 stop = 4 

350 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

351 self.assertNotEqual(metric, sliced) 

352 self.assertEqual(metric.summary, sliced.summary) 

353 self.assertEqual(metric.output, sliced.output) 

354 self.assertEqual(metric.data[:stop], sliced.data) 

355 # getDeferred with parameters 

356 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

357 self.assertNotEqual(metric, sliced) 

358 self.assertEqual(metric.summary, sliced.summary) 

359 self.assertEqual(metric.output, sliced.output) 

360 self.assertEqual(metric.data[:stop], sliced.data) 

361 # getDeferred with deferred parameters 

362 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 

368 if storageClass.isComposite(): 

369 # Check that components can be retrieved 

370 metricOut = butler.get(ref.datasetType.name, dataId) 

371 compNameS = ref.datasetType.componentTypeName("summary") 

372 compNameD = ref.datasetType.componentTypeName("data") 

373 summary = butler.get(compNameS, dataId) 

374 self.assertEqual(summary, metric.summary) 

375 data = butler.get(compNameD, dataId) 

376 self.assertEqual(data, metric.data) 

377 

378 if "counter" in storageClass.derivedComponents: 

379 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

380 self.assertEqual(count, len(data)) 

381 

382 count = butler.get( 

383 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

384 ) 

385 self.assertEqual(count, stop) 

386 

387 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

388 summary = butler.getDirect(compRef) 

389 self.assertEqual(summary, metric.summary) 

390 

391 # Create a Dataset type that has the same name but is inconsistent. 

392 inconsistentDatasetType = DatasetType( 

393 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

394 ) 

395 

396 # Getting with a dataset type that does not match registry fails 

397 with self.assertRaises(ValueError): 

398 butler.get(inconsistentDatasetType, dataId) 

399 

400 # Combining a DatasetRef with a dataId should fail 

401 with self.assertRaises(ValueError): 

402 butler.get(ref, dataId) 

403 # Getting with an explicit ref should fail if the id doesn't match 

404 with self.assertRaises(ValueError): 

405 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

406 

407 # Getting a dataset with unknown parameters should fail 

408 with self.assertRaises(KeyError): 

409 butler.get(ref, parameters={"unsupported": True}) 

410 

411 # Check we have a collection 

412 collections = set(butler.registry.queryCollections()) 

413 self.assertEqual(collections, expected_collections) 

414 

415 # Clean up to check that we can remove something that may have 

416 # already had a component removed 

417 butler.pruneDatasets([ref], unstore=True, purge=True) 

418 

419 # Check that we can configure a butler to accept a put even 

420 # if it already has the dataset in registry. 

421 ref = butler.put(metric, refIn) 

422 

423 # Repeat put will fail. 

424 with self.assertRaises(ConflictingDefinitionError): 

425 butler.put(metric, refIn) 

426 

427 # Remove the datastore entry. 

428 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

429 

430 # Put will still fail 

431 with self.assertRaises(ConflictingDefinitionError): 

432 butler.put(metric, refIn) 

433 

434 # Allow the put to succeed 

435 butler._allow_put_of_predefined_dataset = True 

436 ref2 = butler.put(metric, refIn) 

437 self.assertEqual(ref2.id, ref.id) 

438 

439 # A second put will still fail but with a different exception 

440 # than before. 

441 with self.assertRaises(ConflictingDefinitionError): 

442 butler.put(metric, refIn) 

443 

444 # Reset the flag to avoid confusion 

445 butler._allow_put_of_predefined_dataset = False 

446 

447 # Leave the dataset in place since some downstream tests require 

448 # something to be present 

449 

450 return butler 

451 

452 def testDeferredCollectionPassing(self): 

453 # Construct a butler with no run or collection, but make it writeable. 

454 butler = Butler(self.tmpConfigFile, writeable=True) 

455 # Create and register a DatasetType 

456 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

457 datasetType = self.addDatasetType( 

458 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

459 ) 

460 # Add needed Dimensions 

461 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

462 butler.registry.insertDimensionData( 

463 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

464 ) 

465 butler.registry.insertDimensionData( 

466 "visit", 

467 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

468 ) 

469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

470 # Create dataset. 

471 metric = makeExampleMetrics() 

472 # Register a new run and put dataset. 

473 run = "deferred" 

474 self.assertTrue(butler.registry.registerRun(run)) 

475 # Second time it will be allowed but indicate no-op 

476 self.assertFalse(butler.registry.registerRun(run)) 

477 ref = butler.put(metric, datasetType, dataId, run=run) 

478 # Putting with no run should fail with TypeError. 

479 with self.assertRaises(CollectionError): 

480 butler.put(metric, datasetType, dataId) 

481 # Dataset should exist. 

482 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

483 # We should be able to get the dataset back, but with and without 

484 # a deferred dataset handle. 

485 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

486 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

487 # Trying to find the dataset without any collection is a TypeError. 

488 with self.assertRaises(CollectionError): 

489 butler.datasetExists(datasetType, dataId) 

490 with self.assertRaises(CollectionError): 

491 butler.get(datasetType, dataId) 

492 # Associate the dataset with a different collection. 

493 butler.registry.registerCollection("tagged") 

494 butler.registry.associate("tagged", [ref]) 

495 # Deleting the dataset from the new collection should make it findable 

496 # in the original collection. 

497 butler.pruneDatasets([ref], tags=["tagged"]) 

498 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

499 

500 

501class ButlerTests(ButlerPutGetTests): 

502 """Tests for Butler.""" 

503 

504 useTempRoot = True 

505 

506 def setUp(self): 

507 """Create a new butler root for each test.""" 

508 self.root = makeTestTempDir(TESTDIR) 

509 Butler.makeRepo(self.root, config=Config(self.configFile)) 

510 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

511 

512 def testConstructor(self): 

513 """Independent test of constructor.""" 

514 butler = Butler(self.tmpConfigFile, run=self.default_run) 

515 self.assertIsInstance(butler, Butler) 

516 

517 # Check that butler.yaml is added automatically. 

518 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

519 config_dir = self.tmpConfigFile[: -len(end)] 

520 butler = Butler(config_dir, run=self.default_run) 

521 self.assertIsInstance(butler, Butler) 

522 

523 # Even with a ResourcePath. 

524 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

525 self.assertIsInstance(butler, Butler) 

526 

527 collections = set(butler.registry.queryCollections()) 

528 self.assertEqual(collections, {self.default_run}) 

529 

530 # Check that some special characters can be included in run name. 

531 special_run = "u@b.c-A" 

532 butler_special = Butler(butler=butler, run=special_run) 

533 collections = set(butler_special.registry.queryCollections("*@*")) 

534 self.assertEqual(collections, {special_run}) 

535 

536 butler2 = Butler(butler=butler, collections=["other"]) 

537 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

538 self.assertIsNone(butler2.run) 

539 self.assertIs(butler.datastore, butler2.datastore) 

540 

541 # Test that we can use an environment variable to find this 

542 # repository. 

543 butler_index = Config() 

544 butler_index["label"] = self.tmpConfigFile 

545 for suffix in (".yaml", ".json"): 

546 # Ensure that the content differs so that we know that 

547 # we aren't reusing the cache. 

548 bad_label = f"s3://bucket/not_real{suffix}" 

549 butler_index["bad_label"] = bad_label 

550 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

551 butler_index.dumpToUri(temp_file) 

552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

553 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

554 uri = Butler.get_repo_uri("bad_label") 

555 self.assertEqual(uri, ResourcePath(bad_label)) 

556 uri = Butler.get_repo_uri("label") 

557 butler = Butler(uri, writeable=False) 

558 self.assertIsInstance(butler, Butler) 

559 butler = Butler("label", writeable=False) 

560 self.assertIsInstance(butler, Butler) 

561 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

562 Butler("not_there", writeable=False) 

563 with self.assertRaises(KeyError) as cm: 

564 Butler.get_repo_uri("missing") 

565 self.assertIn("not known to", str(cm.exception)) 

566 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

567 with self.assertRaises(FileNotFoundError): 

568 Butler.get_repo_uri("label") 

569 self.assertEqual(Butler.get_known_repos(), set()) 

570 with self.assertRaises(KeyError) as cm: 

571 # No environment variable set. 

572 Butler.get_repo_uri("label") 

573 self.assertIn("No repository index defined", str(cm.exception)) 

574 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

575 # No aliases registered. 

576 Butler("not_there") 

577 self.assertEqual(Butler.get_known_repos(), set()) 

578 

579 def testBasicPutGet(self): 

580 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

581 self.runPutGetTest(storageClass, "test_metric") 

582 

583 def testCompositePutGetConcrete(self): 

584 

585 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

586 butler = self.runPutGetTest(storageClass, "test_metric") 

587 

588 # Should *not* be disassembled 

589 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

590 self.assertEqual(len(datasets), 1) 

591 uri, components = butler.getURIs(datasets[0]) 

592 self.assertIsInstance(uri, ResourcePath) 

593 self.assertFalse(components) 

594 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

595 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

596 

597 # Predicted dataset 

598 dataId = {"instrument": "DummyCamComp", "visit": 424} 

599 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

600 self.assertFalse(components) 

601 self.assertIsInstance(uri, ResourcePath) 

602 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

603 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

604 

605 def testCompositePutGetVirtual(self): 

606 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

607 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

608 

609 # Should be disassembled 

610 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

611 self.assertEqual(len(datasets), 1) 

612 uri, components = butler.getURIs(datasets[0]) 

613 

614 if butler.datastore.isEphemeral: 

615 # Never disassemble in-memory datastore 

616 self.assertIsInstance(uri, ResourcePath) 

617 self.assertFalse(components) 

618 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

619 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

620 else: 

621 self.assertIsNone(uri) 

622 self.assertEqual(set(components), set(storageClass.components)) 

623 for compuri in components.values(): 

624 self.assertIsInstance(compuri, ResourcePath) 

625 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

626 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

627 

628 # Predicted dataset 

629 dataId = {"instrument": "DummyCamComp", "visit": 424} 

630 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

631 

632 if butler.datastore.isEphemeral: 

633 # Never disassembled 

634 self.assertIsInstance(uri, ResourcePath) 

635 self.assertFalse(components) 

636 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

637 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

638 else: 

639 self.assertIsNone(uri) 

640 self.assertEqual(set(components), set(storageClass.components)) 

641 for compuri in components.values(): 

642 self.assertIsInstance(compuri, ResourcePath) 

643 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

644 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

645 

646 def testIngest(self): 

647 butler = Butler(self.tmpConfigFile, run=self.default_run) 

648 

649 # Create and register a DatasetType 

650 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

651 

652 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

653 datasetTypeName = "metric" 

654 

655 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

656 

657 # Add needed Dimensions 

658 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

659 butler.registry.insertDimensionData( 

660 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

661 ) 

662 for detector in (1, 2): 

663 butler.registry.insertDimensionData( 

664 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

665 ) 

666 

667 butler.registry.insertDimensionData( 

668 "visit", 

669 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

670 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

671 ) 

672 

673 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

674 dataRoot = os.path.join(TESTDIR, "data", "basic") 

675 datasets = [] 

676 for detector in (1, 2): 

677 detector_name = f"detector_{detector}" 

678 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

679 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

680 # Create a DatasetRef for ingest 

681 refIn = DatasetRef(datasetType, dataId, id=None) 

682 

683 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

684 

685 butler.ingest(*datasets, transfer="copy") 

686 

687 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

688 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

689 

690 metrics1 = butler.get(datasetTypeName, dataId1) 

691 metrics2 = butler.get(datasetTypeName, dataId2) 

692 self.assertNotEqual(metrics1, metrics2) 

693 

694 # Compare URIs 

695 uri1 = butler.getURI(datasetTypeName, dataId1) 

696 uri2 = butler.getURI(datasetTypeName, dataId2) 

697 self.assertNotEqual(uri1, uri2) 

698 

699 # Now do a multi-dataset but single file ingest 

700 metricFile = os.path.join(dataRoot, "detectors.yaml") 

701 refs = [] 

702 for detector in (1, 2): 

703 detector_name = f"detector_{detector}" 

704 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

705 # Create a DatasetRef for ingest 

706 refs.append(DatasetRef(datasetType, dataId, id=None)) 

707 

708 datasets = [] 

709 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

710 

711 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

712 

713 # Check that the datastore recorded no file size. 

714 # Not all datastores can support this. 

715 try: 

716 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

717 self.assertEqual(infos[0].file_size, -1) 

718 except AttributeError: 

719 pass 

720 

721 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

722 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

723 

724 multi1 = butler.get(datasetTypeName, dataId1) 

725 multi2 = butler.get(datasetTypeName, dataId2) 

726 

727 self.assertEqual(multi1, metrics1) 

728 self.assertEqual(multi2, metrics2) 

729 

730 # Compare URIs 

731 uri1 = butler.getURI(datasetTypeName, dataId1) 

732 uri2 = butler.getURI(datasetTypeName, dataId2) 

733 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

734 

735 # Test that removing one does not break the second 

736 # This line will issue a warning log message for a ChainedDatastore 

737 # that uses an InMemoryDatastore since in-memory can not ingest 

738 # files. 

739 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

740 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

741 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

742 multi2b = butler.get(datasetTypeName, dataId2) 

743 self.assertEqual(multi2, multi2b) 

744 

745 def testPruneCollections(self): 

746 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

747 butler = Butler(self.tmpConfigFile, writeable=True) 

748 # Load registry data with dimensions to hang datasets off of. 

749 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

750 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

751 # Add some RUN-type collections. 

752 run1 = "run1" 

753 butler.registry.registerRun(run1) 

754 run2 = "run2" 

755 butler.registry.registerRun(run2) 

756 # put some datasets. ref1 and ref2 have the same data ID, and are in 

757 # different runs. ref3 has a different data ID. 

758 metric = makeExampleMetrics() 

759 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

760 datasetType = self.addDatasetType( 

761 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

762 ) 

763 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

764 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

765 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

766 

767 # Try to delete a RUN collection without purge, or with purge and not 

768 # unstore. 

769 with self.assertRaises(TypeError): 

770 butler.pruneCollection(run1) 

771 with self.assertRaises(TypeError): 

772 butler.pruneCollection(run2, purge=True) 

773 # Add a TAGGED collection and associate ref3 only into it. 

774 tag1 = "tag1" 

775 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

776 self.assertTrue(registered) 

777 # Registering a second time should be allowed. 

778 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

779 self.assertFalse(registered) 

780 butler.registry.associate(tag1, [ref3]) 

781 # Add a CHAINED collection that searches run1 and then run2. It 

782 # logically contains only ref1, because ref2 is shadowed due to them 

783 # having the same data ID and dataset type. 

784 chain1 = "chain1" 

785 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

786 butler.registry.setCollectionChain(chain1, [run1, run2]) 

787 # Try to delete RUN collections, which should fail with complete 

788 # rollback because they're still referenced by the CHAINED 

789 # collection. 

790 with self.assertRaises(Exception): 

791 butler.pruneCollection(run1, pruge=True, unstore=True) 

792 with self.assertRaises(Exception): 

793 butler.pruneCollection(run2, pruge=True, unstore=True) 

794 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

795 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

796 self.assertTrue(existence[ref1]) 

797 self.assertTrue(existence[ref2]) 

798 self.assertTrue(existence[ref3]) 

799 # Try to delete CHAINED and TAGGED collections with purge; should not 

800 # work. 

801 with self.assertRaises(TypeError): 

802 butler.pruneCollection(tag1, purge=True, unstore=True) 

803 with self.assertRaises(TypeError): 

804 butler.pruneCollection(chain1, purge=True, unstore=True) 

805 # Remove the tagged collection with unstore=False. This should not 

806 # affect the datasets. 

807 butler.pruneCollection(tag1) 

808 with self.assertRaises(MissingCollectionError): 

809 butler.registry.getCollectionType(tag1) 

810 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

811 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

812 self.assertTrue(existence[ref1]) 

813 self.assertTrue(existence[ref2]) 

814 self.assertTrue(existence[ref3]) 

815 # Add the tagged collection back in, and remove it with unstore=True. 

816 # This should remove ref3 only from the datastore. 

817 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

818 butler.registry.associate(tag1, [ref3]) 

819 butler.pruneCollection(tag1, unstore=True) 

820 with self.assertRaises(MissingCollectionError): 

821 butler.registry.getCollectionType(tag1) 

822 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

823 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

824 self.assertTrue(existence[ref1]) 

825 self.assertTrue(existence[ref2]) 

826 self.assertFalse(existence[ref3]) 

827 # Delete the chain with unstore=False. The datasets should not be 

828 # affected at all. 

829 butler.pruneCollection(chain1) 

830 with self.assertRaises(MissingCollectionError): 

831 butler.registry.getCollectionType(chain1) 

832 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

833 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

834 self.assertTrue(existence[ref1]) 

835 self.assertTrue(existence[ref2]) 

836 self.assertFalse(existence[ref3]) 

837 # Redefine and then delete the chain with unstore=True. Only ref1 

838 # should be unstored (ref3 has already been unstored, but otherwise 

839 # would be now). 

840 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

841 butler.registry.setCollectionChain(chain1, [run1, run2]) 

842 butler.pruneCollection(chain1, unstore=True) 

843 with self.assertRaises(MissingCollectionError): 

844 butler.registry.getCollectionType(chain1) 

845 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

846 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

847 self.assertFalse(existence[ref1]) 

848 self.assertTrue(existence[ref2]) 

849 self.assertFalse(existence[ref3]) 

850 # Remove run1. This removes ref1 and ref3 from the registry (they're 

851 # already gone from the datastore, which is fine). 

852 butler.pruneCollection(run1, purge=True, unstore=True) 

853 with self.assertRaises(MissingCollectionError): 

854 butler.registry.getCollectionType(run1) 

855 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

856 self.assertTrue(butler.datastore.exists(ref2)) 

857 # Remove run2. This removes ref2 from the registry and the datastore. 

858 butler.pruneCollection(run2, purge=True, unstore=True) 

859 with self.assertRaises(MissingCollectionError): 

860 butler.registry.getCollectionType(run2) 

861 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

862 

863 # Now that the collections have been pruned we can remove the 

864 # dataset type 

865 butler.registry.removeDatasetType(datasetType.name) 

866 

867 def testPickle(self): 

868 """Test pickle support.""" 

869 butler = Butler(self.tmpConfigFile, run=self.default_run) 

870 butlerOut = pickle.loads(pickle.dumps(butler)) 

871 self.assertIsInstance(butlerOut, Butler) 

872 self.assertEqual(butlerOut._config, butler._config) 

873 self.assertEqual(butlerOut.collections, butler.collections) 

874 self.assertEqual(butlerOut.run, butler.run) 

875 

876 def testGetDatasetTypes(self): 

877 butler = Butler(self.tmpConfigFile, run=self.default_run) 

878 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

879 dimensionEntries = [ 

880 ( 

881 "instrument", 

882 {"instrument": "DummyCam"}, 

883 {"instrument": "DummyHSC"}, 

884 {"instrument": "DummyCamComp"}, 

885 ), 

886 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

887 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

888 ] 

889 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

890 # Add needed Dimensions 

891 for args in dimensionEntries: 

892 butler.registry.insertDimensionData(*args) 

893 

894 # When a DatasetType is added to the registry entries are not created 

895 # for components but querying them can return the components. 

896 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

897 components = set() 

898 for datasetTypeName in datasetTypeNames: 

899 # Create and register a DatasetType 

900 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

901 

902 for componentName in storageClass.components: 

903 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

904 

905 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

906 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

907 

908 # Now that we have some dataset types registered, validate them 

909 butler.validateConfiguration( 

910 ignore=[ 

911 "test_metric_comp", 

912 "metric3", 

913 "calexp", 

914 "DummySC", 

915 "datasetType.component", 

916 "random_data", 

917 "random_data_2", 

918 ] 

919 ) 

920 

921 # Add a new datasetType that will fail template validation 

922 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

923 if self.validationCanFail: 

924 with self.assertRaises(ValidationError): 

925 butler.validateConfiguration() 

926 

927 # Rerun validation but with a subset of dataset type names 

928 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

929 

930 # Rerun validation but ignore the bad datasetType 

931 butler.validateConfiguration( 

932 ignore=[ 

933 "test_metric_comp", 

934 "metric3", 

935 "calexp", 

936 "DummySC", 

937 "datasetType.component", 

938 "random_data", 

939 "random_data_2", 

940 ] 

941 ) 

942 

943 def testTransaction(self): 

944 butler = Butler(self.tmpConfigFile, run=self.default_run) 

945 datasetTypeName = "test_metric" 

946 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

947 dimensionEntries = ( 

948 ("instrument", {"instrument": "DummyCam"}), 

949 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

950 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

951 ) 

952 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

953 metric = makeExampleMetrics() 

954 dataId = {"instrument": "DummyCam", "visit": 42} 

955 # Create and register a DatasetType 

956 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

957 with self.assertRaises(TransactionTestError): 

958 with butler.transaction(): 

959 # Add needed Dimensions 

960 for args in dimensionEntries: 

961 butler.registry.insertDimensionData(*args) 

962 # Store a dataset 

963 ref = butler.put(metric, datasetTypeName, dataId) 

964 self.assertIsInstance(ref, DatasetRef) 

965 # Test getDirect 

966 metricOut = butler.getDirect(ref) 

967 self.assertEqual(metric, metricOut) 

968 # Test get 

969 metricOut = butler.get(datasetTypeName, dataId) 

970 self.assertEqual(metric, metricOut) 

971 # Check we can get components 

972 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

973 raise TransactionTestError("This should roll back the entire transaction") 

974 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

975 butler.registry.expandDataId(dataId) 

976 # Should raise LookupError for missing data ID value 

977 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

978 butler.get(datasetTypeName, dataId) 

979 # Also check explicitly if Dataset entry is missing 

980 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

981 # Direct retrieval should not find the file in the Datastore 

982 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

983 butler.getDirect(ref) 

984 

985 def testMakeRepo(self): 

986 """Test that we can write butler configuration to a new repository via 

987 the Butler.makeRepo interface and then instantiate a butler from the 

988 repo root. 

989 """ 

990 # Do not run the test if we know this datastore configuration does 

991 # not support a file system root 

992 if self.fullConfigKey is None: 

993 return 

994 

995 # create two separate directories 

996 root1 = tempfile.mkdtemp(dir=self.root) 

997 root2 = tempfile.mkdtemp(dir=self.root) 

998 

999 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1000 limited = Config(self.configFile) 

1001 butler1 = Butler(butlerConfig) 

1002 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1003 full = Config(self.tmpConfigFile) 

1004 butler2 = Butler(butlerConfig) 

1005 # Butlers should have the same configuration regardless of whether 

1006 # defaults were expanded. 

1007 self.assertEqual(butler1._config, butler2._config) 

1008 # Config files loaded directly should not be the same. 

1009 self.assertNotEqual(limited, full) 

1010 # Make sure "limited" doesn't have a few keys we know it should be 

1011 # inheriting from defaults. 

1012 self.assertIn(self.fullConfigKey, full) 

1013 self.assertNotIn(self.fullConfigKey, limited) 

1014 

1015 # Collections don't appear until something is put in them 

1016 collections1 = set(butler1.registry.queryCollections()) 

1017 self.assertEqual(collections1, set()) 

1018 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1019 

1020 # Check that a config with no associated file name will not 

1021 # work properly with relocatable Butler repo 

1022 butlerConfig.configFile = None 

1023 with self.assertRaises(ValueError): 

1024 Butler(butlerConfig) 

1025 

1026 with self.assertRaises(FileExistsError): 

1027 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1028 

1029 def testStringification(self): 

1030 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1031 butlerStr = str(butler) 

1032 

1033 if self.datastoreStr is not None: 

1034 for testStr in self.datastoreStr: 

1035 self.assertIn(testStr, butlerStr) 

1036 if self.registryStr is not None: 

1037 self.assertIn(self.registryStr, butlerStr) 

1038 

1039 datastoreName = butler.datastore.name 

1040 if self.datastoreName is not None: 

1041 for testStr in self.datastoreName: 

1042 self.assertIn(testStr, datastoreName) 

1043 

1044 def testButlerRewriteDataId(self): 

1045 """Test that dataIds can be rewritten based on dimension records.""" 

1046 

1047 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1048 

1049 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1050 datasetTypeName = "random_data" 

1051 

1052 # Create dimension records. 

1053 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1054 butler.registry.insertDimensionData( 

1055 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1056 ) 

1057 butler.registry.insertDimensionData( 

1058 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1059 ) 

1060 

1061 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1062 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1063 butler.registry.registerDatasetType(datasetType) 

1064 

1065 n_exposures = 5 

1066 dayobs = 20210530 

1067 

1068 for i in range(n_exposures): 

1069 butler.registry.insertDimensionData( 

1070 "exposure", 

1071 { 

1072 "instrument": "DummyCamComp", 

1073 "id": i, 

1074 "obs_id": f"exp{i}", 

1075 "seq_num": i, 

1076 "day_obs": dayobs, 

1077 "physical_filter": "d-r", 

1078 }, 

1079 ) 

1080 

1081 # Write some data. 

1082 for i in range(n_exposures): 

1083 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1084 

1085 # Use the seq_num for the put to test rewriting. 

1086 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1087 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1088 

1089 # Check that the exposure is correct in the dataId 

1090 self.assertEqual(ref.dataId["exposure"], i) 

1091 

1092 # and check that we can get the dataset back with the same dataId 

1093 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1094 self.assertEqual(new_metric, metric) 

1095 

1096 

1097class FileDatastoreButlerTests(ButlerTests): 

1098 """Common tests and specialization of ButlerTests for butlers backed 

1099 by datastores that inherit from FileDatastore. 

1100 """ 

1101 

1102 def checkFileExists(self, root, relpath): 

1103 """Checks if file exists at a given path (relative to root). 

1104 

1105 Test testPutTemplates verifies actual physical existance of the files 

1106 in the requested location. 

1107 """ 

1108 uri = ResourcePath(root, forceDirectory=True) 

1109 return uri.join(relpath).exists() 

1110 

1111 def testPutTemplates(self): 

1112 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1113 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1114 

1115 # Add needed Dimensions 

1116 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1117 butler.registry.insertDimensionData( 

1118 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1119 ) 

1120 butler.registry.insertDimensionData( 

1121 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1122 ) 

1123 butler.registry.insertDimensionData( 

1124 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1125 ) 

1126 

1127 # Create and store a dataset 

1128 metric = makeExampleMetrics() 

1129 

1130 # Create two almost-identical DatasetTypes (both will use default 

1131 # template) 

1132 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1133 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1134 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1135 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1136 

1137 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1138 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1139 

1140 # Put with exactly the data ID keys needed 

1141 ref = butler.put(metric, "metric1", dataId1) 

1142 uri = butler.getURI(ref) 

1143 self.assertTrue( 

1144 self.checkFileExists( 

1145 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1146 ), 

1147 f"Checking existence of {uri}", 

1148 ) 

1149 

1150 # Check the template based on dimensions 

1151 butler.datastore.templates.validateTemplates([ref]) 

1152 

1153 # Put with extra data ID keys (physical_filter is an optional 

1154 # dependency); should not change template (at least the way we're 

1155 # defining them to behave now; the important thing is that they 

1156 # must be consistent). 

1157 ref = butler.put(metric, "metric2", dataId2) 

1158 uri = butler.getURI(ref) 

1159 self.assertTrue( 

1160 self.checkFileExists( 

1161 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1162 ), 

1163 f"Checking existence of {uri}", 

1164 ) 

1165 

1166 # Check the template based on dimensions 

1167 butler.datastore.templates.validateTemplates([ref]) 

1168 

1169 # Now use a file template that will not result in unique filenames 

1170 with self.assertRaises(FileTemplateValidationError): 

1171 butler.put(metric, "metric3", dataId1) 

1172 

1173 def testImportExport(self): 

1174 # Run put/get tests just to create and populate a repo. 

1175 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1176 self.runImportExportTest(storageClass) 

1177 

1178 @unittest.expectedFailure 

1179 def testImportExportVirtualComposite(self): 

1180 # Run put/get tests just to create and populate a repo. 

1181 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1182 self.runImportExportTest(storageClass) 

1183 

1184 def runImportExportTest(self, storageClass): 

1185 """This test does an export to a temp directory and an import back 

1186 into a new temp directory repo. It does not assume a posix datastore""" 

1187 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1188 print("Root:", exportButler.datastore.root) 

1189 # Test that the repo actually has at least one dataset. 

1190 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1191 self.assertGreater(len(datasets), 0) 

1192 # Add a DimensionRecord that's unused by those datasets. 

1193 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1194 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1195 # Export and then import datasets. 

1196 with safeTestTempDir(TESTDIR) as exportDir: 

1197 exportFile = os.path.join(exportDir, "exports.yaml") 

1198 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1199 export.saveDatasets(datasets) 

1200 # Export the same datasets again. This should quietly do 

1201 # nothing because of internal deduplication, and it shouldn't 

1202 # complain about being asked to export the "htm7" elements even 

1203 # though there aren't any in these datasets or in the database. 

1204 export.saveDatasets(datasets, elements=["htm7"]) 

1205 # Save one of the data IDs again; this should be harmless 

1206 # because of internal deduplication. 

1207 export.saveDataIds([datasets[0].dataId]) 

1208 # Save some dimension records directly. 

1209 export.saveDimensionData("skymap", [skymapRecord]) 

1210 self.assertTrue(os.path.exists(exportFile)) 

1211 with safeTestTempDir(TESTDIR) as importDir: 

1212 # We always want this to be a local posix butler 

1213 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1214 # Calling script.butlerImport tests the implementation of the 

1215 # butler command line interface "import" subcommand. Functions 

1216 # in the script folder are generally considered protected and 

1217 # should not be used as public api. 

1218 with open(exportFile, "r") as f: 

1219 script.butlerImport( 

1220 importDir, 

1221 export_file=f, 

1222 directory=exportDir, 

1223 transfer="auto", 

1224 skip_dimensions=None, 

1225 reuse_ids=False, 

1226 ) 

1227 importButler = Butler(importDir, run=self.default_run) 

1228 for ref in datasets: 

1229 with self.subTest(ref=ref): 

1230 # Test for existence by passing in the DatasetType and 

1231 # data ID separately, to avoid lookup by dataset_id. 

1232 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1233 self.assertEqual( 

1234 list(importButler.registry.queryDimensionRecords("skymap")), 

1235 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1236 ) 

1237 

1238 def testRemoveRuns(self): 

1239 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1240 butler = Butler(self.tmpConfigFile, writeable=True) 

1241 # Load registry data with dimensions to hang datasets off of. 

1242 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1243 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1244 # Add some RUN-type collection. 

1245 run1 = "run1" 

1246 butler.registry.registerRun(run1) 

1247 run2 = "run2" 

1248 butler.registry.registerRun(run2) 

1249 # put a dataset in each 

1250 metric = makeExampleMetrics() 

1251 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1252 datasetType = self.addDatasetType( 

1253 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1254 ) 

1255 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1256 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1257 uri1 = butler.getURI(ref1, collections=[run1]) 

1258 uri2 = butler.getURI(ref2, collections=[run2]) 

1259 # Remove from both runs with different values for unstore. 

1260 butler.removeRuns([run1], unstore=True) 

1261 butler.removeRuns([run2], unstore=False) 

1262 # Should be nothing in registry for either one, and datastore should 

1263 # not think either exists. 

1264 with self.assertRaises(MissingCollectionError): 

1265 butler.registry.getCollectionType(run1) 

1266 with self.assertRaises(MissingCollectionError): 

1267 butler.registry.getCollectionType(run2) 

1268 self.assertFalse(butler.datastore.exists(ref1)) 

1269 self.assertFalse(butler.datastore.exists(ref2)) 

1270 # The ref we unstored should be gone according to the URI, but the 

1271 # one we forgot should still be around. 

1272 self.assertFalse(uri1.exists()) 

1273 self.assertTrue(uri2.exists()) 

1274 

1275 

1276class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1277 """PosixDatastore specialization of a butler""" 

1278 

1279 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1280 fullConfigKey = ".datastore.formatters" 

1281 validationCanFail = True 

1282 datastoreStr = ["/tmp"] 

1283 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1284 registryStr = "/gen3.sqlite3" 

1285 

1286 def testPathConstructor(self): 

1287 """Independent test of constructor using PathLike.""" 

1288 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1289 self.assertIsInstance(butler, Butler) 

1290 

1291 # And again with a Path object with the butler yaml 

1292 path = pathlib.Path(self.tmpConfigFile) 

1293 butler = Butler(path, writeable=False) 

1294 self.assertIsInstance(butler, Butler) 

1295 

1296 # And again with a Path object without the butler yaml 

1297 # (making sure we skip it if the tmp config doesn't end 

1298 # in butler.yaml -- which is the case for a subclass) 

1299 if self.tmpConfigFile.endswith("butler.yaml"): 

1300 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1301 butler = Butler(path, writeable=False) 

1302 self.assertIsInstance(butler, Butler) 

1303 

1304 def testExportTransferCopy(self): 

1305 """Test local export using all transfer modes""" 

1306 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1307 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1308 # Test that the repo actually has at least one dataset. 

1309 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1310 self.assertGreater(len(datasets), 0) 

1311 uris = [exportButler.getURI(d) for d in datasets] 

1312 datastoreRoot = exportButler.datastore.root 

1313 

1314 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1315 

1316 for path in pathsInStore: 

1317 # Assume local file system 

1318 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1319 

1320 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1321 with safeTestTempDir(TESTDIR) as exportDir: 

1322 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1323 export.saveDatasets(datasets) 

1324 for path in pathsInStore: 

1325 self.assertTrue( 

1326 self.checkFileExists(exportDir, path), 

1327 f"Check that mode {transfer} exported files", 

1328 ) 

1329 

1330 def testPruneDatasets(self): 

1331 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1332 butler = Butler(self.tmpConfigFile, writeable=True) 

1333 # Load registry data with dimensions to hang datasets off of. 

1334 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1335 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1336 # Add some RUN-type collections. 

1337 run1 = "run1" 

1338 butler.registry.registerRun(run1) 

1339 run2 = "run2" 

1340 butler.registry.registerRun(run2) 

1341 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1342 # different runs. ref3 has a different data ID. 

1343 metric = makeExampleMetrics() 

1344 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1345 datasetType = self.addDatasetType( 

1346 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1347 ) 

1348 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1349 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1350 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1351 

1352 # Simple prune. 

1353 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1354 with self.assertRaises(LookupError): 

1355 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1356 

1357 # Put data back. 

1358 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1359 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1360 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1361 

1362 # Check that in normal mode, deleting the record will lead to 

1363 # trash not touching the file. 

1364 uri1 = butler.datastore.getURI(ref1) 

1365 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1366 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1367 butler.datastore.trash(ref1) 

1368 butler.datastore.emptyTrash() 

1369 self.assertTrue(uri1.exists()) 

1370 uri1.remove() # Clean it up. 

1371 

1372 # Simulate execution butler setup by deleting the datastore 

1373 # record but keeping the file around and trusting. 

1374 butler.datastore.trustGetRequest = True 

1375 uri2 = butler.datastore.getURI(ref2) 

1376 uri3 = butler.datastore.getURI(ref3) 

1377 self.assertTrue(uri2.exists()) 

1378 self.assertTrue(uri3.exists()) 

1379 

1380 # Remove the datastore record. 

1381 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1382 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1383 self.assertTrue(uri2.exists()) 

1384 butler.datastore.trash([ref2, ref3]) 

1385 # Immediate removal for ref2 file 

1386 self.assertFalse(uri2.exists()) 

1387 # But ref3 has to wait for the empty. 

1388 self.assertTrue(uri3.exists()) 

1389 butler.datastore.emptyTrash() 

1390 self.assertFalse(uri3.exists()) 

1391 

1392 # Clear out the datasets from registry. 

1393 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1394 

1395 def testPytypePutCoercion(self): 

1396 """Test python type coercion on Butler.get and put.""" 

1397 

1398 # Store some data with the normal example storage class. 

1399 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1400 datasetTypeName = "test_metric" 

1401 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1402 

1403 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1404 

1405 # Put a dict and this should coerce to a MetricsExample 

1406 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1407 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1408 test_metric = butler.getDirect(metric_ref) 

1409 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1410 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1411 self.assertEqual(test_metric.output, test_dict["output"]) 

1412 

1413 # Check that the put still works if a DatasetType is given with 

1414 # a definition matching this python type. 

1415 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1416 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1417 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1418 self.assertEqual(metric2_ref.datasetType, registry_type) 

1419 

1420 # The get will return the type expected by registry. 

1421 test_metric2 = butler.getDirect(metric2_ref) 

1422 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1423 

1424 # Make a new DatasetRef with the compatible but different DatasetType. 

1425 # This should now return a dict. 

1426 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1427 test_dict2 = butler.getDirect(new_ref) 

1428 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1429 

1430 # Get it again with the wrong dataset type definition using get() 

1431 # rather than getDirect(). This should be consistent with getDirect() 

1432 # behavior and return the type of the DatasetType. 

1433 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1434 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1435 

1436 def testPytypeCoercion(self): 

1437 """Test python type coercion on Butler.get and put.""" 

1438 

1439 # Store some data with the normal example storage class. 

1440 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1441 datasetTypeName = "test_metric" 

1442 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1443 

1444 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1445 metric = butler.get(datasetTypeName, dataId=dataId) 

1446 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1447 

1448 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1449 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1450 

1451 # Now need to hack the registry dataset type definition. 

1452 # There is no API for this. 

1453 manager = butler.registry._managers.datasets 

1454 manager._db.update( 

1455 manager._static.dataset_type, 

1456 {"name": datasetTypeName}, 

1457 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1458 ) 

1459 

1460 # Force reset of dataset type cache 

1461 butler.registry.refresh() 

1462 

1463 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1464 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1465 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1466 

1467 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1468 self.assertNotEqual(type(metric_model), type(metric)) 

1469 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1470 

1471 # Put the model and read it back to show that everything now 

1472 # works as normal. 

1473 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1474 metric_model_new = butler.get(metric_ref) 

1475 self.assertEqual(metric_model_new, metric_model) 

1476 

1477 # Hack the storage class again to something that will fail on the 

1478 # get with no conversion class. 

1479 manager._db.update( 

1480 manager._static.dataset_type, 

1481 {"name": datasetTypeName}, 

1482 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1483 ) 

1484 butler.registry.refresh() 

1485 

1486 with self.assertRaises(ValueError): 

1487 butler.get(datasetTypeName, dataId=dataId) 

1488 

1489 

1490class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1491 """InMemoryDatastore specialization of a butler""" 

1492 

1493 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1494 fullConfigKey = None 

1495 useTempRoot = False 

1496 validationCanFail = False 

1497 datastoreStr = ["datastore='InMemory"] 

1498 datastoreName = ["InMemoryDatastore@"] 

1499 registryStr = "/gen3.sqlite3" 

1500 

1501 def testIngest(self): 

1502 pass 

1503 

1504 

1505class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1506 """PosixDatastore specialization""" 

1507 

1508 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1509 fullConfigKey = ".datastore.datastores.1.formatters" 

1510 validationCanFail = True 

1511 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1512 datastoreName = [ 

1513 "InMemoryDatastore@", 

1514 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1515 "SecondDatastore", 

1516 ] 

1517 registryStr = "/gen3.sqlite3" 

1518 

1519 

1520class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1521 """Test that a yaml file in one location can refer to a root in another.""" 

1522 

1523 datastoreStr = ["dir1"] 

1524 # Disable the makeRepo test since we are deliberately not using 

1525 # butler.yaml as the config name. 

1526 fullConfigKey = None 

1527 

1528 def setUp(self): 

1529 self.root = makeTestTempDir(TESTDIR) 

1530 

1531 # Make a new repository in one place 

1532 self.dir1 = os.path.join(self.root, "dir1") 

1533 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1534 

1535 # Move the yaml file to a different place and add a "root" 

1536 self.dir2 = os.path.join(self.root, "dir2") 

1537 os.makedirs(self.dir2, exist_ok=True) 

1538 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1539 config = Config(configFile1) 

1540 config["root"] = self.dir1 

1541 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1542 config.dumpToUri(configFile2) 

1543 os.remove(configFile1) 

1544 self.tmpConfigFile = configFile2 

1545 

1546 def testFileLocations(self): 

1547 self.assertNotEqual(self.dir1, self.dir2) 

1548 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1549 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1550 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1551 

1552 

1553class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1554 """Test that a config file created by makeRepo outside of repo works.""" 

1555 

1556 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1557 

1558 def setUp(self): 

1559 self.root = makeTestTempDir(TESTDIR) 

1560 self.root2 = makeTestTempDir(TESTDIR) 

1561 

1562 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1563 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1564 

1565 def tearDown(self): 

1566 if os.path.exists(self.root2): 

1567 shutil.rmtree(self.root2, ignore_errors=True) 

1568 super().tearDown() 

1569 

1570 def testConfigExistence(self): 

1571 c = Config(self.tmpConfigFile) 

1572 uri_config = ResourcePath(c["root"]) 

1573 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1574 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1575 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1576 

1577 def testPutGet(self): 

1578 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1579 self.runPutGetTest(storageClass, "test_metric") 

1580 

1581 

1582class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1583 """Test that a config file created by makeRepo outside of repo works.""" 

1584 

1585 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1586 

1587 def setUp(self): 

1588 self.root = makeTestTempDir(TESTDIR) 

1589 self.root2 = makeTestTempDir(TESTDIR) 

1590 

1591 self.tmpConfigFile = self.root2 

1592 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1593 

1594 def testConfigExistence(self): 

1595 # Append the yaml file else Config constructor does not know the file 

1596 # type. 

1597 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1598 super().testConfigExistence() 

1599 

1600 

1601class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1602 """Test that a config file created by makeRepo outside of repo works.""" 

1603 

1604 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1605 

1606 def setUp(self): 

1607 self.root = makeTestTempDir(TESTDIR) 

1608 self.root2 = makeTestTempDir(TESTDIR) 

1609 

1610 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1611 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1612 

1613 

1614@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1615class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1616 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1617 a local in-memory SqlRegistry. 

1618 """ 

1619 

1620 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1621 fullConfigKey = None 

1622 validationCanFail = True 

1623 

1624 bucketName = "anybucketname" 

1625 """Name of the Bucket that will be used in the tests. The name is read from 

1626 the config file used with the tests during set-up. 

1627 """ 

1628 

1629 root = "butlerRoot/" 

1630 """Root repository directory expected to be used in case useTempRoot=False. 

1631 Otherwise the root is set to a 20 characters long randomly generated string 

1632 during set-up. 

1633 """ 

1634 

1635 datastoreStr = [f"datastore={root}"] 

1636 """Contains all expected root locations in a format expected to be 

1637 returned by Butler stringification. 

1638 """ 

1639 

1640 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1641 """The expected format of the S3 Datastore string.""" 

1642 

1643 registryStr = "/gen3.sqlite3" 

1644 """Expected format of the Registry string.""" 

1645 

1646 mock_s3 = mock_s3() 

1647 """The mocked s3 interface from moto.""" 

1648 

1649 def genRoot(self): 

1650 """Returns a random string of len 20 to serve as a root 

1651 name for the temporary bucket repo. 

1652 

1653 This is equivalent to tempfile.mkdtemp as this is what self.root 

1654 becomes when useTempRoot is True. 

1655 """ 

1656 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1657 return rndstr + "/" 

1658 

1659 def setUp(self): 

1660 config = Config(self.configFile) 

1661 uri = ResourcePath(config[".datastore.datastore.root"]) 

1662 self.bucketName = uri.netloc 

1663 

1664 # Enable S3 mocking of tests. 

1665 self.mock_s3.start() 

1666 

1667 # set up some fake credentials if they do not exist 

1668 self.usingDummyCredentials = setAwsEnvCredentials() 

1669 

1670 if self.useTempRoot: 

1671 self.root = self.genRoot() 

1672 rooturi = f"s3://{self.bucketName}/{self.root}" 

1673 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1674 

1675 # need local folder to store registry database 

1676 self.reg_dir = makeTestTempDir(TESTDIR) 

1677 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1678 

1679 # MOTO needs to know that we expect Bucket bucketname to exist 

1680 # (this used to be the class attribute bucketName) 

1681 s3 = boto3.resource("s3") 

1682 s3.create_bucket(Bucket=self.bucketName) 

1683 

1684 self.datastoreStr = f"datastore={self.root}" 

1685 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1686 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1687 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1688 

1689 def tearDown(self): 

1690 s3 = boto3.resource("s3") 

1691 bucket = s3.Bucket(self.bucketName) 

1692 try: 

1693 bucket.objects.all().delete() 

1694 except botocore.exceptions.ClientError as e: 

1695 if e.response["Error"]["Code"] == "404": 

1696 # the key was not reachable - pass 

1697 pass 

1698 else: 

1699 raise 

1700 

1701 bucket = s3.Bucket(self.bucketName) 

1702 bucket.delete() 

1703 

1704 # Stop the S3 mock. 

1705 self.mock_s3.stop() 

1706 

1707 # unset any potentially set dummy credentials 

1708 if self.usingDummyCredentials: 

1709 unsetAwsEnvCredentials() 

1710 

1711 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1712 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1713 

1714 if self.useTempRoot and os.path.exists(self.root): 

1715 shutil.rmtree(self.root, ignore_errors=True) 

1716 

1717 

1718@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1719class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1720 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1721 a local in-memory SqlRegistry. 

1722 """ 

1723 

1724 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1725 fullConfigKey = None 

1726 validationCanFail = True 

1727 

1728 serverName = "localhost" 

1729 """Name of the server that will be used in the tests. 

1730 """ 

1731 

1732 portNumber = 8080 

1733 """Port on which the webdav server listens. Automatically chosen 

1734 at setUpClass via the _getfreeport() method 

1735 """ 

1736 

1737 root = "butlerRoot/" 

1738 """Root repository directory expected to be used in case useTempRoot=False. 

1739 Otherwise the root is set to a 20 characters long randomly generated string 

1740 during set-up. 

1741 """ 

1742 

1743 datastoreStr = [f"datastore={root}"] 

1744 """Contains all expected root locations in a format expected to be 

1745 returned by Butler stringification. 

1746 """ 

1747 

1748 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1749 """The expected format of the WebdavDatastore string.""" 

1750 

1751 registryStr = "/gen3.sqlite3" 

1752 """Expected format of the Registry string.""" 

1753 

1754 serverThread = None 

1755 """Thread in which the local webdav server will run""" 

1756 

1757 stopWebdavServer = False 

1758 """This flag will cause the webdav server to 

1759 gracefully shut down when True 

1760 """ 

1761 

1762 def genRoot(self): 

1763 """Returns a random string of len 20 to serve as a root 

1764 name for the temporary bucket repo. 

1765 

1766 This is equivalent to tempfile.mkdtemp as this is what self.root 

1767 becomes when useTempRoot is True. 

1768 """ 

1769 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1770 return rndstr + "/" 

1771 

1772 @classmethod 

1773 def setUpClass(cls): 

1774 # Do the same as inherited class 

1775 cls.storageClassFactory = StorageClassFactory() 

1776 cls.storageClassFactory.addFromConfig(cls.configFile) 

1777 

1778 cls.portNumber = cls._getfreeport() 

1779 # Run a local webdav server on which tests will be run 

1780 cls.serverThread = Thread( 

1781 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1782 ) 

1783 cls.serverThread.start() 

1784 # Wait for it to start 

1785 time.sleep(3) 

1786 

1787 @classmethod 

1788 def tearDownClass(cls): 

1789 # Ask for graceful shut down of the webdav server 

1790 cls.stopWebdavServer = True 

1791 # Wait for the thread to exit 

1792 cls.serverThread.join() 

1793 

1794 def setUp(self): 

1795 config = Config(self.configFile) 

1796 

1797 if self.useTempRoot: 

1798 self.root = self.genRoot() 

1799 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1800 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1801 

1802 # need local folder to store registry database 

1803 self.reg_dir = makeTestTempDir(TESTDIR) 

1804 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1805 

1806 self.datastoreStr = f"datastore={self.root}" 

1807 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1808 

1809 if not _is_webdav_endpoint(self.rooturi): 

1810 raise OSError("Webdav server not running properly: cannot run tests.") 

1811 

1812 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1813 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1814 

1815 def tearDown(self): 

1816 # Clear temporary directory 

1817 ResourcePath(self.rooturi).remove() 

1818 ResourcePath(self.rooturi).session.close() 

1819 

1820 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1821 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1822 

1823 if self.useTempRoot and os.path.exists(self.root): 

1824 shutil.rmtree(self.root, ignore_errors=True) 

1825 

1826 def _serveWebdav(self, port: int, stopWebdavServer): 

1827 """Starts a local webdav-compatible HTTP server, 

1828 Listening on http://localhost:port 

1829 This server only runs when this test class is instantiated, 

1830 and then shuts down. Must be started is a separate thread. 

1831 

1832 Parameters 

1833 ---------- 

1834 port : `int` 

1835 The port number on which the server should listen 

1836 """ 

1837 root_path = gettempdir() 

1838 

1839 config = { 

1840 "host": "0.0.0.0", 

1841 "port": port, 

1842 "provider_mapping": {"/": root_path}, 

1843 "http_authenticator": {"domain_controller": None}, 

1844 "simple_dc": {"user_mapping": {"*": True}}, 

1845 "verbose": 0, 

1846 } 

1847 app = WsgiDAVApp(config) 

1848 

1849 server_args = { 

1850 "bind_addr": (config["host"], config["port"]), 

1851 "wsgi_app": app, 

1852 } 

1853 server = wsgi.Server(**server_args) 

1854 server.prepare() 

1855 

1856 try: 

1857 # Start the actual server in a separate thread 

1858 t = Thread(target=server.serve, daemon=True) 

1859 t.start() 

1860 # watch stopWebdavServer, and gracefully 

1861 # shut down the server when True 

1862 while True: 

1863 if stopWebdavServer(): 

1864 break 

1865 time.sleep(1) 

1866 except KeyboardInterrupt: 

1867 print("Caught Ctrl-C, shutting down...") 

1868 finally: 

1869 server.stop() 

1870 t.join() 

1871 

1872 def _getfreeport(): 

1873 """ 

1874 Determines a free port using sockets. 

1875 """ 

1876 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1877 free_socket.bind(("0.0.0.0", 0)) 

1878 free_socket.listen() 

1879 port = free_socket.getsockname()[1] 

1880 free_socket.close() 

1881 return port 

1882 

1883 

1884class PosixDatastoreTransfers(unittest.TestCase): 

1885 """Test data transfers between butlers. 

1886 

1887 Test for different managers. UUID to UUID and integer to integer are 

1888 tested. UUID to integer is not supported since we do not currently 

1889 want to allow that. Integer to UUID is supported with the caveat 

1890 that UUID4 will be generated and this will be incorrect for raw 

1891 dataset types. The test ignores that. 

1892 """ 

1893 

1894 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1895 

1896 @classmethod 

1897 def setUpClass(cls): 

1898 cls.storageClassFactory = StorageClassFactory() 

1899 cls.storageClassFactory.addFromConfig(cls.configFile) 

1900 

1901 def setUp(self): 

1902 self.root = makeTestTempDir(TESTDIR) 

1903 self.config = Config(self.configFile) 

1904 

1905 def tearDown(self): 

1906 removeTestTempDir(self.root) 

1907 

1908 def create_butler(self, manager, label): 

1909 config = Config(self.configFile) 

1910 config["registry", "managers", "datasets"] = manager 

1911 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1912 

1913 def create_butlers(self, manager1, manager2): 

1914 self.source_butler = self.create_butler(manager1, "1") 

1915 self.target_butler = self.create_butler(manager2, "2") 

1916 

1917 def testTransferUuidToUuid(self): 

1918 self.create_butlers( 

1919 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1920 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1921 ) 

1922 # Setting id_gen_map should have no effect here 

1923 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1924 

1925 def testTransferIntToInt(self): 

1926 self.create_butlers( 

1927 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1928 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1929 ) 

1930 # int dataset ID only allows UNIQUE 

1931 self.assertButlerTransfers() 

1932 

1933 def testTransferIntToUuid(self): 

1934 self.create_butlers( 

1935 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1936 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1937 ) 

1938 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1939 

1940 def testTransferMissing(self): 

1941 """Test transfers where datastore records are missing. 

1942 

1943 This is how execution butler works. 

1944 """ 

1945 self.create_butlers( 

1946 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1947 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1948 ) 

1949 

1950 # Configure the source butler to allow trust. 

1951 self.source_butler.datastore.trustGetRequest = True 

1952 

1953 self.assertButlerTransfers(purge=True) 

1954 

1955 def testTransferMissingDisassembly(self): 

1956 """Test transfers where datastore records are missing. 

1957 

1958 This is how execution butler works. 

1959 """ 

1960 self.create_butlers( 

1961 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1962 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1963 ) 

1964 

1965 # Configure the source butler to allow trust. 

1966 self.source_butler.datastore.trustGetRequest = True 

1967 

1968 # Test disassembly. 

1969 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1970 

1971 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1972 """Test that a run can be transferred to another butler.""" 

1973 

1974 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1975 datasetTypeName = "random_data" 

1976 

1977 # Test will create 3 collections and we will want to transfer 

1978 # two of those three. 

1979 runs = ["run1", "run2", "other"] 

1980 

1981 # Also want to use two different dataset types to ensure that 

1982 # grouping works. 

1983 datasetTypeNames = ["random_data", "random_data_2"] 

1984 

1985 # Create the run collections in the source butler. 

1986 for run in runs: 

1987 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1988 

1989 # Create dimensions in both butlers (transfer will not create them). 

1990 n_exposures = 30 

1991 for butler in (self.source_butler, self.target_butler): 

1992 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1993 butler.registry.insertDimensionData( 

1994 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1995 ) 

1996 butler.registry.insertDimensionData( 

1997 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1998 ) 

1999 

2000 for i in range(n_exposures): 

2001 butler.registry.insertDimensionData( 

2002 "exposure", 

2003 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2004 ) 

2005 

2006 # Create dataset types in the source butler. 

2007 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2008 for datasetTypeName in datasetTypeNames: 

2009 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2010 self.source_butler.registry.registerDatasetType(datasetType) 

2011 

2012 # Write a dataset to an unrelated run -- this will ensure that 

2013 # we are rewriting integer dataset ids in the target if necessary. 

2014 # Will not be relevant for UUID. 

2015 run = "distraction" 

2016 butler = Butler(butler=self.source_butler, run=run) 

2017 butler.put( 

2018 makeExampleMetrics(), 

2019 datasetTypeName, 

2020 exposure=1, 

2021 instrument="DummyCamComp", 

2022 physical_filter="d-r", 

2023 ) 

2024 

2025 # Write some example metrics to the source 

2026 butler = Butler(butler=self.source_butler) 

2027 

2028 # Set of DatasetRefs that should be in the list of refs to transfer 

2029 # but which will not be transferred. 

2030 deleted = set() 

2031 

2032 n_expected = 20 # Number of datasets expected to be transferred 

2033 source_refs = [] 

2034 for i in range(n_exposures): 

2035 # Put a third of datasets into each collection, only retain 

2036 # two thirds. 

2037 index = i % 3 

2038 run = runs[index] 

2039 datasetTypeName = datasetTypeNames[i % 2] 

2040 

2041 metric_data = { 

2042 "summary": {"counter": i}, 

2043 "output": {"text": "metric"}, 

2044 "data": [2 * x for x in range(i)], 

2045 } 

2046 metric = MetricsExample(**metric_data) 

2047 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2048 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2049 

2050 # Remove the datastore record using low-level API 

2051 if purge: 

2052 # Remove records for a fraction. 

2053 if index == 1: 

2054 

2055 # For one of these delete the file as well. 

2056 # This allows the "missing" code to filter the 

2057 # file out. 

2058 if not deleted: 

2059 primary, uris = butler.datastore.getURIs(ref) 

2060 if primary: 

2061 primary.remove() 

2062 for uri in uris.values(): 

2063 uri.remove() 

2064 n_expected -= 1 

2065 deleted.add(ref) 

2066 

2067 # Remove the datastore record. 

2068 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2069 

2070 if index < 2: 

2071 source_refs.append(ref) 

2072 if ref not in deleted: 

2073 new_metric = butler.get(ref.unresolved(), collections=run) 

2074 self.assertEqual(new_metric, metric) 

2075 

2076 # Create some bad dataset types to ensure we check for inconsistent 

2077 # definitions. 

2078 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2079 for datasetTypeName in datasetTypeNames: 

2080 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2081 self.target_butler.registry.registerDatasetType(datasetType) 

2082 with self.assertRaises(ConflictingDefinitionError): 

2083 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2084 # And remove the bad definitions. 

2085 for datasetTypeName in datasetTypeNames: 

2086 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2087 

2088 # Transfer without creating dataset types should fail. 

2089 with self.assertRaises(KeyError): 

2090 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2091 

2092 # Now transfer them to the second butler 

2093 with self.assertLogs(level=logging.DEBUG) as cm: 

2094 transferred = self.target_butler.transfer_from( 

2095 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2096 ) 

2097 self.assertEqual(len(transferred), n_expected) 

2098 log_output = ";".join(cm.output) 

2099 self.assertIn("found in datastore for chunk", log_output) 

2100 self.assertIn("Creating output run", log_output) 

2101 

2102 # Do the transfer twice to ensure that it will do nothing extra. 

2103 # Only do this if purge=True because it does not work for int 

2104 # dataset_id. 

2105 if purge: 

2106 # This should not need to register dataset types. 

2107 transferred = self.target_butler.transfer_from( 

2108 self.source_butler, source_refs, id_gen_map=id_gen_map 

2109 ) 

2110 self.assertEqual(len(transferred), n_expected) 

2111 

2112 # Also do an explicit low-level transfer to trigger some 

2113 # edge cases. 

2114 with self.assertLogs(level=logging.DEBUG) as cm: 

2115 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2116 log_output = ";".join(cm.output) 

2117 self.assertIn("no file artifacts exist", log_output) 

2118 

2119 with self.assertRaises(TypeError): 

2120 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2121 

2122 with self.assertRaises(ValueError): 

2123 self.target_butler.datastore.transfer_from( 

2124 self.source_butler.datastore, source_refs, transfer="split" 

2125 ) 

2126 

2127 # Now try to get the same refs from the new butler. 

2128 for ref in source_refs: 

2129 if ref not in deleted: 

2130 unresolved_ref = ref.unresolved() 

2131 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2132 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2133 self.assertEqual(new_metric, old_metric) 

2134 

2135 # Now prune run2 collection and create instead a CHAINED collection. 

2136 # This should block the transfer. 

2137 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2138 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2139 with self.assertRaises(CollectionTypeError): 

2140 # Re-importing the run1 datasets can be problematic if they 

2141 # use integer IDs so filter those out. 

2142 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2143 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2144 

2145 

2146if __name__ == "__main__": 2146 ↛ 2147line 2146 didn't jump to line 2147, because the condition on line 2146 was never true

2147 unittest.main()