Coverage for tests/test_butler.py: 15%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1136 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.http import _is_webdav_endpoint 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 default_run = "ingésτ😺" 

136 

137 @staticmethod 

138 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

139 """Create a DatasetType and register it""" 

140 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

141 registry.registerDatasetType(datasetType) 

142 return datasetType 

143 

144 @classmethod 

145 def setUpClass(cls): 

146 cls.storageClassFactory = StorageClassFactory() 

147 cls.storageClassFactory.addFromConfig(cls.configFile) 

148 

149 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

150 datasetType = datasetRef.datasetType 

151 dataId = datasetRef.dataId 

152 deferred = butler.getDirectDeferred(datasetRef) 

153 

154 for component in components: 

155 compTypeName = datasetType.componentTypeName(component) 

156 result = butler.get(compTypeName, dataId, collections=collections) 

157 self.assertEqual(result, getattr(reference, component)) 

158 result_deferred = deferred.get(component=component) 

159 self.assertEqual(result_deferred, result) 

160 

161 def tearDown(self): 

162 removeTestTempDir(self.root) 

163 

164 def create_butler(self, run, storageClass, datasetTypeName): 

165 butler = Butler(self.tmpConfigFile, run=run) 

166 

167 collections = set(butler.registry.queryCollections()) 

168 self.assertEqual(collections, set([run])) 

169 

170 # Create and register a DatasetType 

171 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

172 

173 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

174 

175 # Add needed Dimensions 

176 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

177 butler.registry.insertDimensionData( 

178 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

179 ) 

180 butler.registry.insertDimensionData( 

181 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

182 ) 

183 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

184 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

185 butler.registry.insertDimensionData( 

186 "visit", 

187 { 

188 "instrument": "DummyCamComp", 

189 "id": 423, 

190 "name": "fourtwentythree", 

191 "physical_filter": "d-r", 

192 "visit_system": 1, 

193 "datetime_begin": visit_start, 

194 "datetime_end": visit_end, 

195 }, 

196 ) 

197 

198 # Add more visits for some later tests 

199 for visit_id in (424, 425): 

200 butler.registry.insertDimensionData( 

201 "visit", 

202 { 

203 "instrument": "DummyCamComp", 

204 "id": visit_id, 

205 "name": f"fourtwentyfour_{visit_id}", 

206 "physical_filter": "d-r", 

207 "visit_system": 1, 

208 }, 

209 ) 

210 return butler, datasetType 

211 

212 def runPutGetTest(self, storageClass, datasetTypeName): 

213 # New datasets will be added to run and tag, but we will only look in 

214 # tag when looking up datasets. 

215 run = self.default_run 

216 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

217 

218 # Create and store a dataset 

219 metric = makeExampleMetrics() 

220 dataId = {"instrument": "DummyCamComp", "visit": 423} 

221 

222 # Create a DatasetRef for put 

223 refIn = DatasetRef(datasetType, dataId, id=None) 

224 

225 # Put with a preexisting id should fail 

226 with self.assertRaises(ValueError): 

227 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

228 

229 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

230 # and once with a DatasetType 

231 

232 # Keep track of any collections we add and do not clean up 

233 expected_collections = {run} 

234 

235 counter = 0 

236 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

237 # Since we are using subTest we can get cascading failures 

238 # here with the first attempt failing and the others failing 

239 # immediately because the dataset already exists. Work around 

240 # this by using a distinct run collection each time 

241 counter += 1 

242 this_run = f"put_run_{counter}" 

243 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

244 expected_collections.update({this_run}) 

245 

246 with self.subTest(args=args): 

247 ref = butler.put(metric, *args, run=this_run) 

248 self.assertIsInstance(ref, DatasetRef) 

249 

250 # Test getDirect 

251 metricOut = butler.getDirect(ref) 

252 self.assertEqual(metric, metricOut) 

253 # Test get 

254 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

255 self.assertEqual(metric, metricOut) 

256 # Test get with a datasetRef 

257 metricOut = butler.get(ref, collections=this_run) 

258 self.assertEqual(metric, metricOut) 

259 # Test getDeferred with dataId 

260 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

261 self.assertEqual(metric, metricOut) 

262 # Test getDeferred with a datasetRef 

263 metricOut = butler.getDeferred(ref, collections=this_run).get() 

264 self.assertEqual(metric, metricOut) 

265 # and deferred direct with ref 

266 metricOut = butler.getDirectDeferred(ref).get() 

267 self.assertEqual(metric, metricOut) 

268 

269 # Check we can get components 

270 if storageClass.isComposite(): 

271 self.assertGetComponents( 

272 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

273 ) 

274 

275 # Can the artifacts themselves be retrieved? 

276 if not butler.datastore.isEphemeral: 

277 root_uri = ResourcePath(self.root) 

278 

279 for preserve_path in (True, False): 

280 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

281 # Use copy so that we can test that overwrite 

282 # protection works (using "auto" for File URIs would 

283 # use hard links and subsequent transfer would work 

284 # because it knows they are the same file). 

285 transferred = butler.retrieveArtifacts( 

286 [ref], destination, preserve_path=preserve_path, transfer="copy" 

287 ) 

288 self.assertGreater(len(transferred), 0) 

289 artifacts = list(ResourcePath.findFileResources([destination])) 

290 self.assertEqual(set(transferred), set(artifacts)) 

291 

292 for artifact in transferred: 

293 path_in_destination = artifact.relative_to(destination) 

294 self.assertIsNotNone(path_in_destination) 

295 

296 # when path is not preserved there should not be 

297 # any path separators. 

298 num_seps = path_in_destination.count("/") 

299 if preserve_path: 

300 self.assertGreater(num_seps, 0) 

301 else: 

302 self.assertEqual(num_seps, 0) 

303 

304 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

305 n_uris = len(secondary_uris) 

306 if primary_uri: 

307 n_uris += 1 

308 self.assertEqual( 

309 len(artifacts), 

310 n_uris, 

311 "Comparing expected artifacts vs actual:" 

312 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

313 ) 

314 

315 if preserve_path: 

316 # No need to run these twice 

317 with self.assertRaises(ValueError): 

318 butler.retrieveArtifacts([ref], destination, transfer="move") 

319 

320 with self.assertRaises(FileExistsError): 

321 butler.retrieveArtifacts([ref], destination) 

322 

323 transferred_again = butler.retrieveArtifacts( 

324 [ref], destination, preserve_path=preserve_path, overwrite=True 

325 ) 

326 self.assertEqual(set(transferred_again), set(transferred)) 

327 

328 # Now remove the dataset completely. 

329 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

330 # Lookup with original args should still fail. 

331 with self.assertRaises(LookupError): 

332 butler.datasetExists(*args, collections=this_run) 

333 # getDirect() should still fail. 

334 with self.assertRaises(FileNotFoundError): 

335 butler.getDirect(ref) 

336 # Registry shouldn't be able to find it by dataset_id anymore. 

337 self.assertIsNone(butler.registry.getDataset(ref.id)) 

338 

339 # Do explicit registry removal since we know they are 

340 # empty 

341 butler.registry.removeCollection(this_run) 

342 expected_collections.remove(this_run) 

343 

344 # Put the dataset again, since the last thing we did was remove it 

345 # and we want to use the default collection. 

346 ref = butler.put(metric, refIn) 

347 

348 # Get with parameters 

349 stop = 4 

350 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

351 self.assertNotEqual(metric, sliced) 

352 self.assertEqual(metric.summary, sliced.summary) 

353 self.assertEqual(metric.output, sliced.output) 

354 self.assertEqual(metric.data[:stop], sliced.data) 

355 # getDeferred with parameters 

356 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

357 self.assertNotEqual(metric, sliced) 

358 self.assertEqual(metric.summary, sliced.summary) 

359 self.assertEqual(metric.output, sliced.output) 

360 self.assertEqual(metric.data[:stop], sliced.data) 

361 # getDeferred with deferred parameters 

362 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 

368 if storageClass.isComposite(): 

369 # Check that components can be retrieved 

370 metricOut = butler.get(ref.datasetType.name, dataId) 

371 compNameS = ref.datasetType.componentTypeName("summary") 

372 compNameD = ref.datasetType.componentTypeName("data") 

373 summary = butler.get(compNameS, dataId) 

374 self.assertEqual(summary, metric.summary) 

375 data = butler.get(compNameD, dataId) 

376 self.assertEqual(data, metric.data) 

377 

378 if "counter" in storageClass.derivedComponents: 

379 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

380 self.assertEqual(count, len(data)) 

381 

382 count = butler.get( 

383 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

384 ) 

385 self.assertEqual(count, stop) 

386 

387 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

388 summary = butler.getDirect(compRef) 

389 self.assertEqual(summary, metric.summary) 

390 

391 # Create a Dataset type that has the same name but is inconsistent. 

392 inconsistentDatasetType = DatasetType( 

393 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

394 ) 

395 

396 # Getting with a dataset type that does not match registry fails 

397 with self.assertRaises(ValueError): 

398 butler.get(inconsistentDatasetType, dataId) 

399 

400 # Combining a DatasetRef with a dataId should fail 

401 with self.assertRaises(ValueError): 

402 butler.get(ref, dataId) 

403 # Getting with an explicit ref should fail if the id doesn't match 

404 with self.assertRaises(ValueError): 

405 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

406 

407 # Getting a dataset with unknown parameters should fail 

408 with self.assertRaises(KeyError): 

409 butler.get(ref, parameters={"unsupported": True}) 

410 

411 # Check we have a collection 

412 collections = set(butler.registry.queryCollections()) 

413 self.assertEqual(collections, expected_collections) 

414 

415 # Clean up to check that we can remove something that may have 

416 # already had a component removed 

417 butler.pruneDatasets([ref], unstore=True, purge=True) 

418 

419 # Check that we can configure a butler to accept a put even 

420 # if it already has the dataset in registry. 

421 ref = butler.put(metric, refIn) 

422 

423 # Repeat put will fail. 

424 with self.assertRaises(ConflictingDefinitionError): 

425 butler.put(metric, refIn) 

426 

427 # Remove the datastore entry. 

428 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

429 

430 # Put will still fail 

431 with self.assertRaises(ConflictingDefinitionError): 

432 butler.put(metric, refIn) 

433 

434 # Allow the put to succeed 

435 butler._allow_put_of_predefined_dataset = True 

436 ref2 = butler.put(metric, refIn) 

437 self.assertEqual(ref2.id, ref.id) 

438 

439 # A second put will still fail but with a different exception 

440 # than before. 

441 with self.assertRaises(ConflictingDefinitionError): 

442 butler.put(metric, refIn) 

443 

444 # Reset the flag to avoid confusion 

445 butler._allow_put_of_predefined_dataset = False 

446 

447 # Leave the dataset in place since some downstream tests require 

448 # something to be present 

449 

450 return butler 

451 

452 def testDeferredCollectionPassing(self): 

453 # Construct a butler with no run or collection, but make it writeable. 

454 butler = Butler(self.tmpConfigFile, writeable=True) 

455 # Create and register a DatasetType 

456 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

457 datasetType = self.addDatasetType( 

458 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

459 ) 

460 # Add needed Dimensions 

461 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

462 butler.registry.insertDimensionData( 

463 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

464 ) 

465 butler.registry.insertDimensionData( 

466 "visit", 

467 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

468 ) 

469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

470 # Create dataset. 

471 metric = makeExampleMetrics() 

472 # Register a new run and put dataset. 

473 run = "deferred" 

474 self.assertTrue(butler.registry.registerRun(run)) 

475 # Second time it will be allowed but indicate no-op 

476 self.assertFalse(butler.registry.registerRun(run)) 

477 ref = butler.put(metric, datasetType, dataId, run=run) 

478 # Putting with no run should fail with TypeError. 

479 with self.assertRaises(CollectionError): 

480 butler.put(metric, datasetType, dataId) 

481 # Dataset should exist. 

482 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

483 # We should be able to get the dataset back, but with and without 

484 # a deferred dataset handle. 

485 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

486 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

487 # Trying to find the dataset without any collection is a TypeError. 

488 with self.assertRaises(CollectionError): 

489 butler.datasetExists(datasetType, dataId) 

490 with self.assertRaises(CollectionError): 

491 butler.get(datasetType, dataId) 

492 # Associate the dataset with a different collection. 

493 butler.registry.registerCollection("tagged") 

494 butler.registry.associate("tagged", [ref]) 

495 # Deleting the dataset from the new collection should make it findable 

496 # in the original collection. 

497 butler.pruneDatasets([ref], tags=["tagged"]) 

498 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

499 

500 

501class ButlerTests(ButlerPutGetTests): 

502 """Tests for Butler.""" 

503 

504 useTempRoot = True 

505 

506 def setUp(self): 

507 """Create a new butler root for each test.""" 

508 self.root = makeTestTempDir(TESTDIR) 

509 Butler.makeRepo(self.root, config=Config(self.configFile)) 

510 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

511 

512 def testConstructor(self): 

513 """Independent test of constructor.""" 

514 butler = Butler(self.tmpConfigFile, run=self.default_run) 

515 self.assertIsInstance(butler, Butler) 

516 

517 # Check that butler.yaml is added automatically. 

518 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

519 config_dir = self.tmpConfigFile[: -len(end)] 

520 butler = Butler(config_dir, run=self.default_run) 

521 self.assertIsInstance(butler, Butler) 

522 

523 collections = set(butler.registry.queryCollections()) 

524 self.assertEqual(collections, {self.default_run}) 

525 

526 # Check that some special characters can be included in run name. 

527 special_run = "u@b.c-A" 

528 butler_special = Butler(butler=butler, run=special_run) 

529 collections = set(butler_special.registry.queryCollections("*@*")) 

530 self.assertEqual(collections, {special_run}) 

531 

532 butler2 = Butler(butler=butler, collections=["other"]) 

533 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

534 self.assertIsNone(butler2.run) 

535 self.assertIs(butler.datastore, butler2.datastore) 

536 

537 # Test that we can use an environment variable to find this 

538 # repository. 

539 butler_index = Config() 

540 butler_index["label"] = self.tmpConfigFile 

541 for suffix in (".yaml", ".json"): 

542 # Ensure that the content differs so that we know that 

543 # we aren't reusing the cache. 

544 bad_label = f"s3://bucket/not_real{suffix}" 

545 butler_index["bad_label"] = bad_label 

546 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

547 butler_index.dumpToUri(temp_file) 

548 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

549 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

550 uri = Butler.get_repo_uri("bad_label") 

551 self.assertEqual(uri, ResourcePath(bad_label)) 

552 uri = Butler.get_repo_uri("label") 

553 butler = Butler(uri, writeable=False) 

554 self.assertIsInstance(butler, Butler) 

555 butler = Butler("label", writeable=False) 

556 self.assertIsInstance(butler, Butler) 

557 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

558 Butler("not_there", writeable=False) 

559 with self.assertRaises(KeyError) as cm: 

560 Butler.get_repo_uri("missing") 

561 self.assertIn("not known to", str(cm.exception)) 

562 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

563 with self.assertRaises(FileNotFoundError): 

564 Butler.get_repo_uri("label") 

565 self.assertEqual(Butler.get_known_repos(), set()) 

566 with self.assertRaises(KeyError) as cm: 

567 # No environment variable set. 

568 Butler.get_repo_uri("label") 

569 self.assertIn("No repository index defined", str(cm.exception)) 

570 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

571 # No aliases registered. 

572 Butler("not_there") 

573 self.assertEqual(Butler.get_known_repos(), set()) 

574 

575 def testBasicPutGet(self): 

576 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

577 self.runPutGetTest(storageClass, "test_metric") 

578 

579 def testCompositePutGetConcrete(self): 

580 

581 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

582 butler = self.runPutGetTest(storageClass, "test_metric") 

583 

584 # Should *not* be disassembled 

585 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

586 self.assertEqual(len(datasets), 1) 

587 uri, components = butler.getURIs(datasets[0]) 

588 self.assertIsInstance(uri, ResourcePath) 

589 self.assertFalse(components) 

590 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

591 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

592 

593 # Predicted dataset 

594 dataId = {"instrument": "DummyCamComp", "visit": 424} 

595 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

596 self.assertFalse(components) 

597 self.assertIsInstance(uri, ResourcePath) 

598 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

599 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

600 

601 def testCompositePutGetVirtual(self): 

602 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

603 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

604 

605 # Should be disassembled 

606 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

607 self.assertEqual(len(datasets), 1) 

608 uri, components = butler.getURIs(datasets[0]) 

609 

610 if butler.datastore.isEphemeral: 

611 # Never disassemble in-memory datastore 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertFalse(components) 

614 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

615 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

616 else: 

617 self.assertIsNone(uri) 

618 self.assertEqual(set(components), set(storageClass.components)) 

619 for compuri in components.values(): 

620 self.assertIsInstance(compuri, ResourcePath) 

621 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

622 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

623 

624 # Predicted dataset 

625 dataId = {"instrument": "DummyCamComp", "visit": 424} 

626 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

627 

628 if butler.datastore.isEphemeral: 

629 # Never disassembled 

630 self.assertIsInstance(uri, ResourcePath) 

631 self.assertFalse(components) 

632 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

633 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

634 else: 

635 self.assertIsNone(uri) 

636 self.assertEqual(set(components), set(storageClass.components)) 

637 for compuri in components.values(): 

638 self.assertIsInstance(compuri, ResourcePath) 

639 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

640 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

641 

642 def testIngest(self): 

643 butler = Butler(self.tmpConfigFile, run=self.default_run) 

644 

645 # Create and register a DatasetType 

646 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

647 

648 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

649 datasetTypeName = "metric" 

650 

651 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

652 

653 # Add needed Dimensions 

654 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

655 butler.registry.insertDimensionData( 

656 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

657 ) 

658 for detector in (1, 2): 

659 butler.registry.insertDimensionData( 

660 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

661 ) 

662 

663 butler.registry.insertDimensionData( 

664 "visit", 

665 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

666 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

667 ) 

668 

669 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

670 dataRoot = os.path.join(TESTDIR, "data", "basic") 

671 datasets = [] 

672 for detector in (1, 2): 

673 detector_name = f"detector_{detector}" 

674 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

675 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

676 # Create a DatasetRef for ingest 

677 refIn = DatasetRef(datasetType, dataId, id=None) 

678 

679 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

680 

681 butler.ingest(*datasets, transfer="copy") 

682 

683 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

684 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

685 

686 metrics1 = butler.get(datasetTypeName, dataId1) 

687 metrics2 = butler.get(datasetTypeName, dataId2) 

688 self.assertNotEqual(metrics1, metrics2) 

689 

690 # Compare URIs 

691 uri1 = butler.getURI(datasetTypeName, dataId1) 

692 uri2 = butler.getURI(datasetTypeName, dataId2) 

693 self.assertNotEqual(uri1, uri2) 

694 

695 # Now do a multi-dataset but single file ingest 

696 metricFile = os.path.join(dataRoot, "detectors.yaml") 

697 refs = [] 

698 for detector in (1, 2): 

699 detector_name = f"detector_{detector}" 

700 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

701 # Create a DatasetRef for ingest 

702 refs.append(DatasetRef(datasetType, dataId, id=None)) 

703 

704 datasets = [] 

705 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

706 

707 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

708 

709 # Check that the datastore recorded no file size. 

710 # Not all datastores can support this. 

711 try: 

712 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

713 self.assertEqual(infos[0].file_size, -1) 

714 except AttributeError: 

715 pass 

716 

717 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

718 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

719 

720 multi1 = butler.get(datasetTypeName, dataId1) 

721 multi2 = butler.get(datasetTypeName, dataId2) 

722 

723 self.assertEqual(multi1, metrics1) 

724 self.assertEqual(multi2, metrics2) 

725 

726 # Compare URIs 

727 uri1 = butler.getURI(datasetTypeName, dataId1) 

728 uri2 = butler.getURI(datasetTypeName, dataId2) 

729 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

730 

731 # Test that removing one does not break the second 

732 # This line will issue a warning log message for a ChainedDatastore 

733 # that uses an InMemoryDatastore since in-memory can not ingest 

734 # files. 

735 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

736 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

737 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

738 multi2b = butler.get(datasetTypeName, dataId2) 

739 self.assertEqual(multi2, multi2b) 

740 

741 def testPruneCollections(self): 

742 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

743 butler = Butler(self.tmpConfigFile, writeable=True) 

744 # Load registry data with dimensions to hang datasets off of. 

745 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

746 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

747 # Add some RUN-type collections. 

748 run1 = "run1" 

749 butler.registry.registerRun(run1) 

750 run2 = "run2" 

751 butler.registry.registerRun(run2) 

752 # put some datasets. ref1 and ref2 have the same data ID, and are in 

753 # different runs. ref3 has a different data ID. 

754 metric = makeExampleMetrics() 

755 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

756 datasetType = self.addDatasetType( 

757 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

758 ) 

759 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

760 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

761 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

762 

763 # Try to delete a RUN collection without purge, or with purge and not 

764 # unstore. 

765 with self.assertRaises(TypeError): 

766 butler.pruneCollection(run1) 

767 with self.assertRaises(TypeError): 

768 butler.pruneCollection(run2, purge=True) 

769 # Add a TAGGED collection and associate ref3 only into it. 

770 tag1 = "tag1" 

771 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

772 self.assertTrue(registered) 

773 # Registering a second time should be allowed. 

774 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

775 self.assertFalse(registered) 

776 butler.registry.associate(tag1, [ref3]) 

777 # Add a CHAINED collection that searches run1 and then run2. It 

778 # logically contains only ref1, because ref2 is shadowed due to them 

779 # having the same data ID and dataset type. 

780 chain1 = "chain1" 

781 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

782 butler.registry.setCollectionChain(chain1, [run1, run2]) 

783 # Try to delete RUN collections, which should fail with complete 

784 # rollback because they're still referenced by the CHAINED 

785 # collection. 

786 with self.assertRaises(Exception): 

787 butler.pruneCollection(run1, pruge=True, unstore=True) 

788 with self.assertRaises(Exception): 

789 butler.pruneCollection(run2, pruge=True, unstore=True) 

790 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

791 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

792 self.assertTrue(existence[ref1]) 

793 self.assertTrue(existence[ref2]) 

794 self.assertTrue(existence[ref3]) 

795 # Try to delete CHAINED and TAGGED collections with purge; should not 

796 # work. 

797 with self.assertRaises(TypeError): 

798 butler.pruneCollection(tag1, purge=True, unstore=True) 

799 with self.assertRaises(TypeError): 

800 butler.pruneCollection(chain1, purge=True, unstore=True) 

801 # Remove the tagged collection with unstore=False. This should not 

802 # affect the datasets. 

803 butler.pruneCollection(tag1) 

804 with self.assertRaises(MissingCollectionError): 

805 butler.registry.getCollectionType(tag1) 

806 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

807 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

808 self.assertTrue(existence[ref1]) 

809 self.assertTrue(existence[ref2]) 

810 self.assertTrue(existence[ref3]) 

811 # Add the tagged collection back in, and remove it with unstore=True. 

812 # This should remove ref3 only from the datastore. 

813 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

814 butler.registry.associate(tag1, [ref3]) 

815 butler.pruneCollection(tag1, unstore=True) 

816 with self.assertRaises(MissingCollectionError): 

817 butler.registry.getCollectionType(tag1) 

818 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

819 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

820 self.assertTrue(existence[ref1]) 

821 self.assertTrue(existence[ref2]) 

822 self.assertFalse(existence[ref3]) 

823 # Delete the chain with unstore=False. The datasets should not be 

824 # affected at all. 

825 butler.pruneCollection(chain1) 

826 with self.assertRaises(MissingCollectionError): 

827 butler.registry.getCollectionType(chain1) 

828 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

829 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

830 self.assertTrue(existence[ref1]) 

831 self.assertTrue(existence[ref2]) 

832 self.assertFalse(existence[ref3]) 

833 # Redefine and then delete the chain with unstore=True. Only ref1 

834 # should be unstored (ref3 has already been unstored, but otherwise 

835 # would be now). 

836 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

837 butler.registry.setCollectionChain(chain1, [run1, run2]) 

838 butler.pruneCollection(chain1, unstore=True) 

839 with self.assertRaises(MissingCollectionError): 

840 butler.registry.getCollectionType(chain1) 

841 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

842 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

843 self.assertFalse(existence[ref1]) 

844 self.assertTrue(existence[ref2]) 

845 self.assertFalse(existence[ref3]) 

846 # Remove run1. This removes ref1 and ref3 from the registry (they're 

847 # already gone from the datastore, which is fine). 

848 butler.pruneCollection(run1, purge=True, unstore=True) 

849 with self.assertRaises(MissingCollectionError): 

850 butler.registry.getCollectionType(run1) 

851 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

852 self.assertTrue(butler.datastore.exists(ref2)) 

853 # Remove run2. This removes ref2 from the registry and the datastore. 

854 butler.pruneCollection(run2, purge=True, unstore=True) 

855 with self.assertRaises(MissingCollectionError): 

856 butler.registry.getCollectionType(run2) 

857 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

858 

859 # Now that the collections have been pruned we can remove the 

860 # dataset type 

861 butler.registry.removeDatasetType(datasetType.name) 

862 

863 def testPickle(self): 

864 """Test pickle support.""" 

865 butler = Butler(self.tmpConfigFile, run=self.default_run) 

866 butlerOut = pickle.loads(pickle.dumps(butler)) 

867 self.assertIsInstance(butlerOut, Butler) 

868 self.assertEqual(butlerOut._config, butler._config) 

869 self.assertEqual(butlerOut.collections, butler.collections) 

870 self.assertEqual(butlerOut.run, butler.run) 

871 

872 def testGetDatasetTypes(self): 

873 butler = Butler(self.tmpConfigFile, run=self.default_run) 

874 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

875 dimensionEntries = [ 

876 ( 

877 "instrument", 

878 {"instrument": "DummyCam"}, 

879 {"instrument": "DummyHSC"}, 

880 {"instrument": "DummyCamComp"}, 

881 ), 

882 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

883 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

884 ] 

885 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

886 # Add needed Dimensions 

887 for args in dimensionEntries: 

888 butler.registry.insertDimensionData(*args) 

889 

890 # When a DatasetType is added to the registry entries are not created 

891 # for components but querying them can return the components. 

892 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

893 components = set() 

894 for datasetTypeName in datasetTypeNames: 

895 # Create and register a DatasetType 

896 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

897 

898 for componentName in storageClass.components: 

899 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

900 

901 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

902 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

903 

904 # Now that we have some dataset types registered, validate them 

905 butler.validateConfiguration( 

906 ignore=[ 

907 "test_metric_comp", 

908 "metric3", 

909 "calexp", 

910 "DummySC", 

911 "datasetType.component", 

912 "random_data", 

913 "random_data_2", 

914 ] 

915 ) 

916 

917 # Add a new datasetType that will fail template validation 

918 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

919 if self.validationCanFail: 

920 with self.assertRaises(ValidationError): 

921 butler.validateConfiguration() 

922 

923 # Rerun validation but with a subset of dataset type names 

924 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

925 

926 # Rerun validation but ignore the bad datasetType 

927 butler.validateConfiguration( 

928 ignore=[ 

929 "test_metric_comp", 

930 "metric3", 

931 "calexp", 

932 "DummySC", 

933 "datasetType.component", 

934 "random_data", 

935 "random_data_2", 

936 ] 

937 ) 

938 

939 def testTransaction(self): 

940 butler = Butler(self.tmpConfigFile, run=self.default_run) 

941 datasetTypeName = "test_metric" 

942 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

943 dimensionEntries = ( 

944 ("instrument", {"instrument": "DummyCam"}), 

945 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

946 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

947 ) 

948 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

949 metric = makeExampleMetrics() 

950 dataId = {"instrument": "DummyCam", "visit": 42} 

951 # Create and register a DatasetType 

952 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

953 with self.assertRaises(TransactionTestError): 

954 with butler.transaction(): 

955 # Add needed Dimensions 

956 for args in dimensionEntries: 

957 butler.registry.insertDimensionData(*args) 

958 # Store a dataset 

959 ref = butler.put(metric, datasetTypeName, dataId) 

960 self.assertIsInstance(ref, DatasetRef) 

961 # Test getDirect 

962 metricOut = butler.getDirect(ref) 

963 self.assertEqual(metric, metricOut) 

964 # Test get 

965 metricOut = butler.get(datasetTypeName, dataId) 

966 self.assertEqual(metric, metricOut) 

967 # Check we can get components 

968 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

969 raise TransactionTestError("This should roll back the entire transaction") 

970 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

971 butler.registry.expandDataId(dataId) 

972 # Should raise LookupError for missing data ID value 

973 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

974 butler.get(datasetTypeName, dataId) 

975 # Also check explicitly if Dataset entry is missing 

976 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

977 # Direct retrieval should not find the file in the Datastore 

978 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

979 butler.getDirect(ref) 

980 

981 def testMakeRepo(self): 

982 """Test that we can write butler configuration to a new repository via 

983 the Butler.makeRepo interface and then instantiate a butler from the 

984 repo root. 

985 """ 

986 # Do not run the test if we know this datastore configuration does 

987 # not support a file system root 

988 if self.fullConfigKey is None: 

989 return 

990 

991 # create two separate directories 

992 root1 = tempfile.mkdtemp(dir=self.root) 

993 root2 = tempfile.mkdtemp(dir=self.root) 

994 

995 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

996 limited = Config(self.configFile) 

997 butler1 = Butler(butlerConfig) 

998 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

999 full = Config(self.tmpConfigFile) 

1000 butler2 = Butler(butlerConfig) 

1001 # Butlers should have the same configuration regardless of whether 

1002 # defaults were expanded. 

1003 self.assertEqual(butler1._config, butler2._config) 

1004 # Config files loaded directly should not be the same. 

1005 self.assertNotEqual(limited, full) 

1006 # Make sure "limited" doesn't have a few keys we know it should be 

1007 # inheriting from defaults. 

1008 self.assertIn(self.fullConfigKey, full) 

1009 self.assertNotIn(self.fullConfigKey, limited) 

1010 

1011 # Collections don't appear until something is put in them 

1012 collections1 = set(butler1.registry.queryCollections()) 

1013 self.assertEqual(collections1, set()) 

1014 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1015 

1016 # Check that a config with no associated file name will not 

1017 # work properly with relocatable Butler repo 

1018 butlerConfig.configFile = None 

1019 with self.assertRaises(ValueError): 

1020 Butler(butlerConfig) 

1021 

1022 with self.assertRaises(FileExistsError): 

1023 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1024 

1025 def testStringification(self): 

1026 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1027 butlerStr = str(butler) 

1028 

1029 if self.datastoreStr is not None: 

1030 for testStr in self.datastoreStr: 

1031 self.assertIn(testStr, butlerStr) 

1032 if self.registryStr is not None: 

1033 self.assertIn(self.registryStr, butlerStr) 

1034 

1035 datastoreName = butler.datastore.name 

1036 if self.datastoreName is not None: 

1037 for testStr in self.datastoreName: 

1038 self.assertIn(testStr, datastoreName) 

1039 

1040 def testButlerRewriteDataId(self): 

1041 """Test that dataIds can be rewritten based on dimension records.""" 

1042 

1043 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1044 

1045 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1046 datasetTypeName = "random_data" 

1047 

1048 # Create dimension records. 

1049 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1050 butler.registry.insertDimensionData( 

1051 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1052 ) 

1053 butler.registry.insertDimensionData( 

1054 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1055 ) 

1056 

1057 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1058 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1059 butler.registry.registerDatasetType(datasetType) 

1060 

1061 n_exposures = 5 

1062 dayobs = 20210530 

1063 

1064 for i in range(n_exposures): 

1065 butler.registry.insertDimensionData( 

1066 "exposure", 

1067 { 

1068 "instrument": "DummyCamComp", 

1069 "id": i, 

1070 "obs_id": f"exp{i}", 

1071 "seq_num": i, 

1072 "day_obs": dayobs, 

1073 "physical_filter": "d-r", 

1074 }, 

1075 ) 

1076 

1077 # Write some data. 

1078 for i in range(n_exposures): 

1079 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1080 

1081 # Use the seq_num for the put to test rewriting. 

1082 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1083 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1084 

1085 # Check that the exposure is correct in the dataId 

1086 self.assertEqual(ref.dataId["exposure"], i) 

1087 

1088 # and check that we can get the dataset back with the same dataId 

1089 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1090 self.assertEqual(new_metric, metric) 

1091 

1092 

1093class FileDatastoreButlerTests(ButlerTests): 

1094 """Common tests and specialization of ButlerTests for butlers backed 

1095 by datastores that inherit from FileDatastore. 

1096 """ 

1097 

1098 def checkFileExists(self, root, relpath): 

1099 """Checks if file exists at a given path (relative to root). 

1100 

1101 Test testPutTemplates verifies actual physical existance of the files 

1102 in the requested location. 

1103 """ 

1104 uri = ResourcePath(root, forceDirectory=True) 

1105 return uri.join(relpath).exists() 

1106 

1107 def testPutTemplates(self): 

1108 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1109 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1110 

1111 # Add needed Dimensions 

1112 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1113 butler.registry.insertDimensionData( 

1114 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1115 ) 

1116 butler.registry.insertDimensionData( 

1117 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1118 ) 

1119 butler.registry.insertDimensionData( 

1120 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1121 ) 

1122 

1123 # Create and store a dataset 

1124 metric = makeExampleMetrics() 

1125 

1126 # Create two almost-identical DatasetTypes (both will use default 

1127 # template) 

1128 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1129 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1130 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1131 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1132 

1133 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1134 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1135 

1136 # Put with exactly the data ID keys needed 

1137 ref = butler.put(metric, "metric1", dataId1) 

1138 uri = butler.getURI(ref) 

1139 self.assertTrue( 

1140 self.checkFileExists( 

1141 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1142 ), 

1143 f"Checking existence of {uri}", 

1144 ) 

1145 

1146 # Check the template based on dimensions 

1147 butler.datastore.templates.validateTemplates([ref]) 

1148 

1149 # Put with extra data ID keys (physical_filter is an optional 

1150 # dependency); should not change template (at least the way we're 

1151 # defining them to behave now; the important thing is that they 

1152 # must be consistent). 

1153 ref = butler.put(metric, "metric2", dataId2) 

1154 uri = butler.getURI(ref) 

1155 self.assertTrue( 

1156 self.checkFileExists( 

1157 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1158 ), 

1159 f"Checking existence of {uri}", 

1160 ) 

1161 

1162 # Check the template based on dimensions 

1163 butler.datastore.templates.validateTemplates([ref]) 

1164 

1165 # Now use a file template that will not result in unique filenames 

1166 with self.assertRaises(FileTemplateValidationError): 

1167 butler.put(metric, "metric3", dataId1) 

1168 

1169 def testImportExport(self): 

1170 # Run put/get tests just to create and populate a repo. 

1171 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1172 self.runImportExportTest(storageClass) 

1173 

1174 @unittest.expectedFailure 

1175 def testImportExportVirtualComposite(self): 

1176 # Run put/get tests just to create and populate a repo. 

1177 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1178 self.runImportExportTest(storageClass) 

1179 

1180 def runImportExportTest(self, storageClass): 

1181 """This test does an export to a temp directory and an import back 

1182 into a new temp directory repo. It does not assume a posix datastore""" 

1183 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1184 print("Root:", exportButler.datastore.root) 

1185 # Test that the repo actually has at least one dataset. 

1186 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1187 self.assertGreater(len(datasets), 0) 

1188 # Add a DimensionRecord that's unused by those datasets. 

1189 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1190 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1191 # Export and then import datasets. 

1192 with safeTestTempDir(TESTDIR) as exportDir: 

1193 exportFile = os.path.join(exportDir, "exports.yaml") 

1194 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1195 export.saveDatasets(datasets) 

1196 # Export the same datasets again. This should quietly do 

1197 # nothing because of internal deduplication, and it shouldn't 

1198 # complain about being asked to export the "htm7" elements even 

1199 # though there aren't any in these datasets or in the database. 

1200 export.saveDatasets(datasets, elements=["htm7"]) 

1201 # Save one of the data IDs again; this should be harmless 

1202 # because of internal deduplication. 

1203 export.saveDataIds([datasets[0].dataId]) 

1204 # Save some dimension records directly. 

1205 export.saveDimensionData("skymap", [skymapRecord]) 

1206 self.assertTrue(os.path.exists(exportFile)) 

1207 with safeTestTempDir(TESTDIR) as importDir: 

1208 # We always want this to be a local posix butler 

1209 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1210 # Calling script.butlerImport tests the implementation of the 

1211 # butler command line interface "import" subcommand. Functions 

1212 # in the script folder are generally considered protected and 

1213 # should not be used as public api. 

1214 with open(exportFile, "r") as f: 

1215 script.butlerImport( 

1216 importDir, 

1217 export_file=f, 

1218 directory=exportDir, 

1219 transfer="auto", 

1220 skip_dimensions=None, 

1221 reuse_ids=False, 

1222 ) 

1223 importButler = Butler(importDir, run=self.default_run) 

1224 for ref in datasets: 

1225 with self.subTest(ref=ref): 

1226 # Test for existence by passing in the DatasetType and 

1227 # data ID separately, to avoid lookup by dataset_id. 

1228 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1229 self.assertEqual( 

1230 list(importButler.registry.queryDimensionRecords("skymap")), 

1231 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1232 ) 

1233 

1234 def testRemoveRuns(self): 

1235 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1236 butler = Butler(self.tmpConfigFile, writeable=True) 

1237 # Load registry data with dimensions to hang datasets off of. 

1238 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1239 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1240 # Add some RUN-type collection. 

1241 run1 = "run1" 

1242 butler.registry.registerRun(run1) 

1243 run2 = "run2" 

1244 butler.registry.registerRun(run2) 

1245 # put a dataset in each 

1246 metric = makeExampleMetrics() 

1247 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1248 datasetType = self.addDatasetType( 

1249 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1250 ) 

1251 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1252 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1253 uri1 = butler.getURI(ref1, collections=[run1]) 

1254 uri2 = butler.getURI(ref2, collections=[run2]) 

1255 # Remove from both runs with different values for unstore. 

1256 butler.removeRuns([run1], unstore=True) 

1257 butler.removeRuns([run2], unstore=False) 

1258 # Should be nothing in registry for either one, and datastore should 

1259 # not think either exists. 

1260 with self.assertRaises(MissingCollectionError): 

1261 butler.registry.getCollectionType(run1) 

1262 with self.assertRaises(MissingCollectionError): 

1263 butler.registry.getCollectionType(run2) 

1264 self.assertFalse(butler.datastore.exists(ref1)) 

1265 self.assertFalse(butler.datastore.exists(ref2)) 

1266 # The ref we unstored should be gone according to the URI, but the 

1267 # one we forgot should still be around. 

1268 self.assertFalse(uri1.exists()) 

1269 self.assertTrue(uri2.exists()) 

1270 

1271 

1272class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1273 """PosixDatastore specialization of a butler""" 

1274 

1275 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1276 fullConfigKey = ".datastore.formatters" 

1277 validationCanFail = True 

1278 datastoreStr = ["/tmp"] 

1279 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1280 registryStr = "/gen3.sqlite3" 

1281 

1282 def testPathConstructor(self): 

1283 """Independent test of constructor using PathLike.""" 

1284 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1285 self.assertIsInstance(butler, Butler) 

1286 

1287 # And again with a Path object with the butler yaml 

1288 path = pathlib.Path(self.tmpConfigFile) 

1289 butler = Butler(path, writeable=False) 

1290 self.assertIsInstance(butler, Butler) 

1291 

1292 # And again with a Path object without the butler yaml 

1293 # (making sure we skip it if the tmp config doesn't end 

1294 # in butler.yaml -- which is the case for a subclass) 

1295 if self.tmpConfigFile.endswith("butler.yaml"): 

1296 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1297 butler = Butler(path, writeable=False) 

1298 self.assertIsInstance(butler, Butler) 

1299 

1300 def testExportTransferCopy(self): 

1301 """Test local export using all transfer modes""" 

1302 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1303 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1304 # Test that the repo actually has at least one dataset. 

1305 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1306 self.assertGreater(len(datasets), 0) 

1307 uris = [exportButler.getURI(d) for d in datasets] 

1308 datastoreRoot = exportButler.datastore.root 

1309 

1310 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1311 

1312 for path in pathsInStore: 

1313 # Assume local file system 

1314 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1315 

1316 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1317 with safeTestTempDir(TESTDIR) as exportDir: 

1318 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1319 export.saveDatasets(datasets) 

1320 for path in pathsInStore: 

1321 self.assertTrue( 

1322 self.checkFileExists(exportDir, path), 

1323 f"Check that mode {transfer} exported files", 

1324 ) 

1325 

1326 def testPruneDatasets(self): 

1327 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1328 butler = Butler(self.tmpConfigFile, writeable=True) 

1329 # Load registry data with dimensions to hang datasets off of. 

1330 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1331 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1332 # Add some RUN-type collections. 

1333 run1 = "run1" 

1334 butler.registry.registerRun(run1) 

1335 run2 = "run2" 

1336 butler.registry.registerRun(run2) 

1337 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1338 # different runs. ref3 has a different data ID. 

1339 metric = makeExampleMetrics() 

1340 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1341 datasetType = self.addDatasetType( 

1342 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1343 ) 

1344 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1345 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1346 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1347 

1348 # Simple prune. 

1349 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1350 with self.assertRaises(LookupError): 

1351 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1352 

1353 # Put data back. 

1354 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1355 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1356 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1357 

1358 # Check that in normal mode, deleting the record will lead to 

1359 # trash not touching the file. 

1360 uri1 = butler.datastore.getURI(ref1) 

1361 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1362 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1363 butler.datastore.trash(ref1) 

1364 butler.datastore.emptyTrash() 

1365 self.assertTrue(uri1.exists()) 

1366 uri1.remove() # Clean it up. 

1367 

1368 # Simulate execution butler setup by deleting the datastore 

1369 # record but keeping the file around and trusting. 

1370 butler.datastore.trustGetRequest = True 

1371 uri2 = butler.datastore.getURI(ref2) 

1372 uri3 = butler.datastore.getURI(ref3) 

1373 self.assertTrue(uri2.exists()) 

1374 self.assertTrue(uri3.exists()) 

1375 

1376 # Remove the datastore record. 

1377 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1378 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1379 self.assertTrue(uri2.exists()) 

1380 butler.datastore.trash([ref2, ref3]) 

1381 # Immediate removal for ref2 file 

1382 self.assertFalse(uri2.exists()) 

1383 # But ref3 has to wait for the empty. 

1384 self.assertTrue(uri3.exists()) 

1385 butler.datastore.emptyTrash() 

1386 self.assertFalse(uri3.exists()) 

1387 

1388 # Clear out the datasets from registry. 

1389 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1390 

1391 def testPytypePutCoercion(self): 

1392 """Test python type coercion on Butler.get and put.""" 

1393 

1394 # Store some data with the normal example storage class. 

1395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1396 datasetTypeName = "test_metric" 

1397 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1398 

1399 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1400 

1401 # Put a dict and this should coerce to a MetricsExample 

1402 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1403 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1404 test_metric = butler.getDirect(metric_ref) 

1405 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1406 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1407 self.assertEqual(test_metric.output, test_dict["output"]) 

1408 

1409 # Check that the put still works if a DatasetType is given with 

1410 # a definition matching this python type. 

1411 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1412 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1413 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1414 self.assertEqual(metric2_ref.datasetType, registry_type) 

1415 

1416 # The get will return the type expected by registry. 

1417 test_metric2 = butler.getDirect(metric2_ref) 

1418 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1419 

1420 # Make a new DatasetRef with the compatible but different DatasetType. 

1421 # This should now return a dict. 

1422 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1423 test_dict2 = butler.getDirect(new_ref) 

1424 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1425 

1426 # Get it again with the wrong dataset type definition using get() 

1427 # rather than getDirect(). This should be consistent with getDirect() 

1428 # behavior and return the type of the DatasetType. 

1429 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1430 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1431 

1432 def testPytypeCoercion(self): 

1433 """Test python type coercion on Butler.get and put.""" 

1434 

1435 # Store some data with the normal example storage class. 

1436 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1437 datasetTypeName = "test_metric" 

1438 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1439 

1440 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1441 metric = butler.get(datasetTypeName, dataId=dataId) 

1442 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1443 

1444 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1445 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1446 

1447 # Now need to hack the registry dataset type definition. 

1448 # There is no API for this. 

1449 manager = butler.registry._managers.datasets 

1450 manager._db.update( 

1451 manager._static.dataset_type, 

1452 {"name": datasetTypeName}, 

1453 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1454 ) 

1455 

1456 # Force reset of dataset type cache 

1457 butler.registry.refresh() 

1458 

1459 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1460 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1461 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1462 

1463 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1464 self.assertNotEqual(type(metric_model), type(metric)) 

1465 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1466 

1467 # Put the model and read it back to show that everything now 

1468 # works as normal. 

1469 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1470 metric_model_new = butler.get(metric_ref) 

1471 self.assertEqual(metric_model_new, metric_model) 

1472 

1473 # Hack the storage class again to something that will fail on the 

1474 # get with no conversion class. 

1475 manager._db.update( 

1476 manager._static.dataset_type, 

1477 {"name": datasetTypeName}, 

1478 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1479 ) 

1480 butler.registry.refresh() 

1481 

1482 with self.assertRaises(ValueError): 

1483 butler.get(datasetTypeName, dataId=dataId) 

1484 

1485 

1486class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1487 """InMemoryDatastore specialization of a butler""" 

1488 

1489 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1490 fullConfigKey = None 

1491 useTempRoot = False 

1492 validationCanFail = False 

1493 datastoreStr = ["datastore='InMemory"] 

1494 datastoreName = ["InMemoryDatastore@"] 

1495 registryStr = "/gen3.sqlite3" 

1496 

1497 def testIngest(self): 

1498 pass 

1499 

1500 

1501class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1502 """PosixDatastore specialization""" 

1503 

1504 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1505 fullConfigKey = ".datastore.datastores.1.formatters" 

1506 validationCanFail = True 

1507 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1508 datastoreName = [ 

1509 "InMemoryDatastore@", 

1510 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1511 "SecondDatastore", 

1512 ] 

1513 registryStr = "/gen3.sqlite3" 

1514 

1515 

1516class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1517 """Test that a yaml file in one location can refer to a root in another.""" 

1518 

1519 datastoreStr = ["dir1"] 

1520 # Disable the makeRepo test since we are deliberately not using 

1521 # butler.yaml as the config name. 

1522 fullConfigKey = None 

1523 

1524 def setUp(self): 

1525 self.root = makeTestTempDir(TESTDIR) 

1526 

1527 # Make a new repository in one place 

1528 self.dir1 = os.path.join(self.root, "dir1") 

1529 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1530 

1531 # Move the yaml file to a different place and add a "root" 

1532 self.dir2 = os.path.join(self.root, "dir2") 

1533 os.makedirs(self.dir2, exist_ok=True) 

1534 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1535 config = Config(configFile1) 

1536 config["root"] = self.dir1 

1537 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1538 config.dumpToUri(configFile2) 

1539 os.remove(configFile1) 

1540 self.tmpConfigFile = configFile2 

1541 

1542 def testFileLocations(self): 

1543 self.assertNotEqual(self.dir1, self.dir2) 

1544 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1545 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1546 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1547 

1548 

1549class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1550 """Test that a config file created by makeRepo outside of repo works.""" 

1551 

1552 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1553 

1554 def setUp(self): 

1555 self.root = makeTestTempDir(TESTDIR) 

1556 self.root2 = makeTestTempDir(TESTDIR) 

1557 

1558 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1559 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1560 

1561 def tearDown(self): 

1562 if os.path.exists(self.root2): 

1563 shutil.rmtree(self.root2, ignore_errors=True) 

1564 super().tearDown() 

1565 

1566 def testConfigExistence(self): 

1567 c = Config(self.tmpConfigFile) 

1568 uri_config = ResourcePath(c["root"]) 

1569 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1570 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1571 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1572 

1573 def testPutGet(self): 

1574 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1575 self.runPutGetTest(storageClass, "test_metric") 

1576 

1577 

1578class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1579 """Test that a config file created by makeRepo outside of repo works.""" 

1580 

1581 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1582 

1583 def setUp(self): 

1584 self.root = makeTestTempDir(TESTDIR) 

1585 self.root2 = makeTestTempDir(TESTDIR) 

1586 

1587 self.tmpConfigFile = self.root2 

1588 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1589 

1590 def testConfigExistence(self): 

1591 # Append the yaml file else Config constructor does not know the file 

1592 # type. 

1593 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1594 super().testConfigExistence() 

1595 

1596 

1597class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1598 """Test that a config file created by makeRepo outside of repo works.""" 

1599 

1600 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1601 

1602 def setUp(self): 

1603 self.root = makeTestTempDir(TESTDIR) 

1604 self.root2 = makeTestTempDir(TESTDIR) 

1605 

1606 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1607 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1608 

1609 

1610@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1611class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1612 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1613 a local in-memory SqlRegistry. 

1614 """ 

1615 

1616 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1617 fullConfigKey = None 

1618 validationCanFail = True 

1619 

1620 bucketName = "anybucketname" 

1621 """Name of the Bucket that will be used in the tests. The name is read from 

1622 the config file used with the tests during set-up. 

1623 """ 

1624 

1625 root = "butlerRoot/" 

1626 """Root repository directory expected to be used in case useTempRoot=False. 

1627 Otherwise the root is set to a 20 characters long randomly generated string 

1628 during set-up. 

1629 """ 

1630 

1631 datastoreStr = [f"datastore={root}"] 

1632 """Contains all expected root locations in a format expected to be 

1633 returned by Butler stringification. 

1634 """ 

1635 

1636 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1637 """The expected format of the S3 Datastore string.""" 

1638 

1639 registryStr = "/gen3.sqlite3" 

1640 """Expected format of the Registry string.""" 

1641 

1642 mock_s3 = mock_s3() 

1643 """The mocked s3 interface from moto.""" 

1644 

1645 def genRoot(self): 

1646 """Returns a random string of len 20 to serve as a root 

1647 name for the temporary bucket repo. 

1648 

1649 This is equivalent to tempfile.mkdtemp as this is what self.root 

1650 becomes when useTempRoot is True. 

1651 """ 

1652 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1653 return rndstr + "/" 

1654 

1655 def setUp(self): 

1656 config = Config(self.configFile) 

1657 uri = ResourcePath(config[".datastore.datastore.root"]) 

1658 self.bucketName = uri.netloc 

1659 

1660 # Enable S3 mocking of tests. 

1661 self.mock_s3.start() 

1662 

1663 # set up some fake credentials if they do not exist 

1664 self.usingDummyCredentials = setAwsEnvCredentials() 

1665 

1666 if self.useTempRoot: 

1667 self.root = self.genRoot() 

1668 rooturi = f"s3://{self.bucketName}/{self.root}" 

1669 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1670 

1671 # need local folder to store registry database 

1672 self.reg_dir = makeTestTempDir(TESTDIR) 

1673 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1674 

1675 # MOTO needs to know that we expect Bucket bucketname to exist 

1676 # (this used to be the class attribute bucketName) 

1677 s3 = boto3.resource("s3") 

1678 s3.create_bucket(Bucket=self.bucketName) 

1679 

1680 self.datastoreStr = f"datastore={self.root}" 

1681 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1682 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1683 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1684 

1685 def tearDown(self): 

1686 s3 = boto3.resource("s3") 

1687 bucket = s3.Bucket(self.bucketName) 

1688 try: 

1689 bucket.objects.all().delete() 

1690 except botocore.exceptions.ClientError as e: 

1691 if e.response["Error"]["Code"] == "404": 

1692 # the key was not reachable - pass 

1693 pass 

1694 else: 

1695 raise 

1696 

1697 bucket = s3.Bucket(self.bucketName) 

1698 bucket.delete() 

1699 

1700 # Stop the S3 mock. 

1701 self.mock_s3.stop() 

1702 

1703 # unset any potentially set dummy credentials 

1704 if self.usingDummyCredentials: 

1705 unsetAwsEnvCredentials() 

1706 

1707 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1708 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1709 

1710 if self.useTempRoot and os.path.exists(self.root): 

1711 shutil.rmtree(self.root, ignore_errors=True) 

1712 

1713 

1714@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1715class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1716 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1717 a local in-memory SqlRegistry. 

1718 """ 

1719 

1720 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1721 fullConfigKey = None 

1722 validationCanFail = True 

1723 

1724 serverName = "localhost" 

1725 """Name of the server that will be used in the tests. 

1726 """ 

1727 

1728 portNumber = 8080 

1729 """Port on which the webdav server listens. Automatically chosen 

1730 at setUpClass via the _getfreeport() method 

1731 """ 

1732 

1733 root = "butlerRoot/" 

1734 """Root repository directory expected to be used in case useTempRoot=False. 

1735 Otherwise the root is set to a 20 characters long randomly generated string 

1736 during set-up. 

1737 """ 

1738 

1739 datastoreStr = [f"datastore={root}"] 

1740 """Contains all expected root locations in a format expected to be 

1741 returned by Butler stringification. 

1742 """ 

1743 

1744 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1745 """The expected format of the WebdavDatastore string.""" 

1746 

1747 registryStr = "/gen3.sqlite3" 

1748 """Expected format of the Registry string.""" 

1749 

1750 serverThread = None 

1751 """Thread in which the local webdav server will run""" 

1752 

1753 stopWebdavServer = False 

1754 """This flag will cause the webdav server to 

1755 gracefully shut down when True 

1756 """ 

1757 

1758 def genRoot(self): 

1759 """Returns a random string of len 20 to serve as a root 

1760 name for the temporary bucket repo. 

1761 

1762 This is equivalent to tempfile.mkdtemp as this is what self.root 

1763 becomes when useTempRoot is True. 

1764 """ 

1765 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1766 return rndstr + "/" 

1767 

1768 @classmethod 

1769 def setUpClass(cls): 

1770 # Do the same as inherited class 

1771 cls.storageClassFactory = StorageClassFactory() 

1772 cls.storageClassFactory.addFromConfig(cls.configFile) 

1773 

1774 cls.portNumber = cls._getfreeport() 

1775 # Run a local webdav server on which tests will be run 

1776 cls.serverThread = Thread( 

1777 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1778 ) 

1779 cls.serverThread.start() 

1780 # Wait for it to start 

1781 time.sleep(3) 

1782 

1783 @classmethod 

1784 def tearDownClass(cls): 

1785 # Ask for graceful shut down of the webdav server 

1786 cls.stopWebdavServer = True 

1787 # Wait for the thread to exit 

1788 cls.serverThread.join() 

1789 

1790 def setUp(self): 

1791 config = Config(self.configFile) 

1792 

1793 if self.useTempRoot: 

1794 self.root = self.genRoot() 

1795 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1796 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1797 

1798 # need local folder to store registry database 

1799 self.reg_dir = makeTestTempDir(TESTDIR) 

1800 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1801 

1802 self.datastoreStr = f"datastore={self.root}" 

1803 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1804 

1805 if not _is_webdav_endpoint(self.rooturi): 

1806 raise OSError("Webdav server not running properly: cannot run tests.") 

1807 

1808 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1809 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1810 

1811 def tearDown(self): 

1812 # Clear temporary directory 

1813 ResourcePath(self.rooturi).remove() 

1814 ResourcePath(self.rooturi).session.close() 

1815 

1816 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1817 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1818 

1819 if self.useTempRoot and os.path.exists(self.root): 

1820 shutil.rmtree(self.root, ignore_errors=True) 

1821 

1822 def _serveWebdav(self, port: int, stopWebdavServer): 

1823 """Starts a local webdav-compatible HTTP server, 

1824 Listening on http://localhost:port 

1825 This server only runs when this test class is instantiated, 

1826 and then shuts down. Must be started is a separate thread. 

1827 

1828 Parameters 

1829 ---------- 

1830 port : `int` 

1831 The port number on which the server should listen 

1832 """ 

1833 root_path = gettempdir() 

1834 

1835 config = { 

1836 "host": "0.0.0.0", 

1837 "port": port, 

1838 "provider_mapping": {"/": root_path}, 

1839 "http_authenticator": {"domain_controller": None}, 

1840 "simple_dc": {"user_mapping": {"*": True}}, 

1841 "verbose": 0, 

1842 } 

1843 app = WsgiDAVApp(config) 

1844 

1845 server_args = { 

1846 "bind_addr": (config["host"], config["port"]), 

1847 "wsgi_app": app, 

1848 } 

1849 server = wsgi.Server(**server_args) 

1850 server.prepare() 

1851 

1852 try: 

1853 # Start the actual server in a separate thread 

1854 t = Thread(target=server.serve, daemon=True) 

1855 t.start() 

1856 # watch stopWebdavServer, and gracefully 

1857 # shut down the server when True 

1858 while True: 

1859 if stopWebdavServer(): 

1860 break 

1861 time.sleep(1) 

1862 except KeyboardInterrupt: 

1863 print("Caught Ctrl-C, shutting down...") 

1864 finally: 

1865 server.stop() 

1866 t.join() 

1867 

1868 def _getfreeport(): 

1869 """ 

1870 Determines a free port using sockets. 

1871 """ 

1872 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1873 free_socket.bind(("0.0.0.0", 0)) 

1874 free_socket.listen() 

1875 port = free_socket.getsockname()[1] 

1876 free_socket.close() 

1877 return port 

1878 

1879 

1880class PosixDatastoreTransfers(unittest.TestCase): 

1881 """Test data transfers between butlers. 

1882 

1883 Test for different managers. UUID to UUID and integer to integer are 

1884 tested. UUID to integer is not supported since we do not currently 

1885 want to allow that. Integer to UUID is supported with the caveat 

1886 that UUID4 will be generated and this will be incorrect for raw 

1887 dataset types. The test ignores that. 

1888 """ 

1889 

1890 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1891 

1892 @classmethod 

1893 def setUpClass(cls): 

1894 cls.storageClassFactory = StorageClassFactory() 

1895 cls.storageClassFactory.addFromConfig(cls.configFile) 

1896 

1897 def setUp(self): 

1898 self.root = makeTestTempDir(TESTDIR) 

1899 self.config = Config(self.configFile) 

1900 

1901 def tearDown(self): 

1902 removeTestTempDir(self.root) 

1903 

1904 def create_butler(self, manager, label): 

1905 config = Config(self.configFile) 

1906 config["registry", "managers", "datasets"] = manager 

1907 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1908 

1909 def create_butlers(self, manager1, manager2): 

1910 self.source_butler = self.create_butler(manager1, "1") 

1911 self.target_butler = self.create_butler(manager2, "2") 

1912 

1913 def testTransferUuidToUuid(self): 

1914 self.create_butlers( 

1915 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1916 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1917 ) 

1918 # Setting id_gen_map should have no effect here 

1919 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1920 

1921 def testTransferIntToInt(self): 

1922 self.create_butlers( 

1923 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1924 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1925 ) 

1926 # int dataset ID only allows UNIQUE 

1927 self.assertButlerTransfers() 

1928 

1929 def testTransferIntToUuid(self): 

1930 self.create_butlers( 

1931 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1932 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1933 ) 

1934 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1935 

1936 def testTransferMissing(self): 

1937 """Test transfers where datastore records are missing. 

1938 

1939 This is how execution butler works. 

1940 """ 

1941 self.create_butlers( 

1942 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1943 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1944 ) 

1945 

1946 # Configure the source butler to allow trust. 

1947 self.source_butler.datastore.trustGetRequest = True 

1948 

1949 self.assertButlerTransfers(purge=True) 

1950 

1951 def testTransferMissingDisassembly(self): 

1952 """Test transfers where datastore records are missing. 

1953 

1954 This is how execution butler works. 

1955 """ 

1956 self.create_butlers( 

1957 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1958 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1959 ) 

1960 

1961 # Configure the source butler to allow trust. 

1962 self.source_butler.datastore.trustGetRequest = True 

1963 

1964 # Test disassembly. 

1965 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1966 

1967 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1968 """Test that a run can be transferred to another butler.""" 

1969 

1970 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1971 datasetTypeName = "random_data" 

1972 

1973 # Test will create 3 collections and we will want to transfer 

1974 # two of those three. 

1975 runs = ["run1", "run2", "other"] 

1976 

1977 # Also want to use two different dataset types to ensure that 

1978 # grouping works. 

1979 datasetTypeNames = ["random_data", "random_data_2"] 

1980 

1981 # Create the run collections in the source butler. 

1982 for run in runs: 

1983 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1984 

1985 # Create dimensions in both butlers (transfer will not create them). 

1986 n_exposures = 30 

1987 for butler in (self.source_butler, self.target_butler): 

1988 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1989 butler.registry.insertDimensionData( 

1990 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1991 ) 

1992 butler.registry.insertDimensionData( 

1993 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1994 ) 

1995 

1996 for i in range(n_exposures): 

1997 butler.registry.insertDimensionData( 

1998 "exposure", 

1999 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2000 ) 

2001 

2002 # Create dataset types in the source butler. 

2003 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2004 for datasetTypeName in datasetTypeNames: 

2005 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2006 self.source_butler.registry.registerDatasetType(datasetType) 

2007 

2008 # Write a dataset to an unrelated run -- this will ensure that 

2009 # we are rewriting integer dataset ids in the target if necessary. 

2010 # Will not be relevant for UUID. 

2011 run = "distraction" 

2012 butler = Butler(butler=self.source_butler, run=run) 

2013 butler.put( 

2014 makeExampleMetrics(), 

2015 datasetTypeName, 

2016 exposure=1, 

2017 instrument="DummyCamComp", 

2018 physical_filter="d-r", 

2019 ) 

2020 

2021 # Write some example metrics to the source 

2022 butler = Butler(butler=self.source_butler) 

2023 

2024 # Set of DatasetRefs that should be in the list of refs to transfer 

2025 # but which will not be transferred. 

2026 deleted = set() 

2027 

2028 n_expected = 20 # Number of datasets expected to be transferred 

2029 source_refs = [] 

2030 for i in range(n_exposures): 

2031 # Put a third of datasets into each collection, only retain 

2032 # two thirds. 

2033 index = i % 3 

2034 run = runs[index] 

2035 datasetTypeName = datasetTypeNames[i % 2] 

2036 

2037 metric_data = { 

2038 "summary": {"counter": i}, 

2039 "output": {"text": "metric"}, 

2040 "data": [2 * x for x in range(i)], 

2041 } 

2042 metric = MetricsExample(**metric_data) 

2043 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2044 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2045 

2046 # Remove the datastore record using low-level API 

2047 if purge: 

2048 # Remove records for a fraction. 

2049 if index == 1: 

2050 

2051 # For one of these delete the file as well. 

2052 # This allows the "missing" code to filter the 

2053 # file out. 

2054 if not deleted: 

2055 primary, uris = butler.datastore.getURIs(ref) 

2056 if primary: 

2057 primary.remove() 

2058 for uri in uris.values(): 

2059 uri.remove() 

2060 n_expected -= 1 

2061 deleted.add(ref) 

2062 

2063 # Remove the datastore record. 

2064 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2065 

2066 if index < 2: 

2067 source_refs.append(ref) 

2068 if ref not in deleted: 

2069 new_metric = butler.get(ref.unresolved(), collections=run) 

2070 self.assertEqual(new_metric, metric) 

2071 

2072 # Create some bad dataset types to ensure we check for inconsistent 

2073 # definitions. 

2074 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2075 for datasetTypeName in datasetTypeNames: 

2076 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2077 self.target_butler.registry.registerDatasetType(datasetType) 

2078 with self.assertRaises(ConflictingDefinitionError): 

2079 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2080 # And remove the bad definitions. 

2081 for datasetTypeName in datasetTypeNames: 

2082 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2083 

2084 # Transfer without creating dataset types should fail. 

2085 with self.assertRaises(KeyError): 

2086 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2087 

2088 # Now transfer them to the second butler 

2089 with self.assertLogs(level=logging.DEBUG) as cm: 

2090 transferred = self.target_butler.transfer_from( 

2091 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2092 ) 

2093 self.assertEqual(len(transferred), n_expected) 

2094 log_output = ";".join(cm.output) 

2095 self.assertIn("found in datastore for chunk", log_output) 

2096 self.assertIn("Creating output run", log_output) 

2097 

2098 # Do the transfer twice to ensure that it will do nothing extra. 

2099 # Only do this if purge=True because it does not work for int 

2100 # dataset_id. 

2101 if purge: 

2102 # This should not need to register dataset types. 

2103 transferred = self.target_butler.transfer_from( 

2104 self.source_butler, source_refs, id_gen_map=id_gen_map 

2105 ) 

2106 self.assertEqual(len(transferred), n_expected) 

2107 

2108 # Also do an explicit low-level transfer to trigger some 

2109 # edge cases. 

2110 with self.assertLogs(level=logging.DEBUG) as cm: 

2111 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2112 log_output = ";".join(cm.output) 

2113 self.assertIn("no file artifacts exist", log_output) 

2114 

2115 with self.assertRaises(TypeError): 

2116 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2117 

2118 with self.assertRaises(ValueError): 

2119 self.target_butler.datastore.transfer_from( 

2120 self.source_butler.datastore, source_refs, transfer="split" 

2121 ) 

2122 

2123 # Now try to get the same refs from the new butler. 

2124 for ref in source_refs: 

2125 if ref not in deleted: 

2126 unresolved_ref = ref.unresolved() 

2127 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2128 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2129 self.assertEqual(new_metric, old_metric) 

2130 

2131 # Now prune run2 collection and create instead a CHAINED collection. 

2132 # This should block the transfer. 

2133 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2134 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2135 with self.assertRaises(CollectionTypeError): 

2136 # Re-importing the run1 datasets can be problematic if they 

2137 # use integer IDs so filter those out. 

2138 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2139 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2140 

2141 

2142if __name__ == "__main__": 2142 ↛ 2143line 2142 didn't jump to line 2143, because the condition on line 2142 was never true

2143 unittest.main()