Coverage for tests/test_butler.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1132 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ( 

77 CollectionError, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DataIdValueError, 

81 MissingCollectionError, 

82) 

83from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

84from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

85from lsst.resources import ResourcePath 

86from lsst.resources.http import isWebdavEndpoint 

87from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

88from lsst.utils import doImport 

89from lsst.utils.introspection import get_full_type_name 

90 

91TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

92 

93 

94def makeExampleMetrics(): 

95 return MetricsExample( 

96 {"AM1": 5.2, "AM2": 30.6}, 

97 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

98 [563, 234, 456.7, 752, 8, 9, 27], 

99 ) 

100 

101 

102class TransactionTestError(Exception): 

103 """Specific error for testing transactions, to prevent misdiagnosing 

104 that might otherwise occur when a standard exception is used. 

105 """ 

106 

107 pass 

108 

109 

110class ButlerConfigTests(unittest.TestCase): 

111 """Simple tests for ButlerConfig that are not tested in any other test 

112 cases.""" 

113 

114 def testSearchPath(self): 

115 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

116 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

117 config1 = ButlerConfig(configFile) 

118 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

119 

120 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

121 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

122 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

123 self.assertIn("testConfigs", "\n".join(cm.output)) 

124 

125 key = ("datastore", "records", "table") 

126 self.assertNotEqual(config1[key], config2[key]) 

127 self.assertEqual(config2[key], "override_record") 

128 

129 

130class ButlerPutGetTests: 

131 """Helper method for running a suite of put/get tests from different 

132 butler configurations.""" 

133 

134 root = None 

135 

136 @staticmethod 

137 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

138 """Create a DatasetType and register it""" 

139 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

140 registry.registerDatasetType(datasetType) 

141 return datasetType 

142 

143 @classmethod 

144 def setUpClass(cls): 

145 cls.storageClassFactory = StorageClassFactory() 

146 cls.storageClassFactory.addFromConfig(cls.configFile) 

147 

148 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

149 datasetType = datasetRef.datasetType 

150 dataId = datasetRef.dataId 

151 deferred = butler.getDirectDeferred(datasetRef) 

152 

153 for component in components: 

154 compTypeName = datasetType.componentTypeName(component) 

155 result = butler.get(compTypeName, dataId, collections=collections) 

156 self.assertEqual(result, getattr(reference, component)) 

157 result_deferred = deferred.get(component=component) 

158 self.assertEqual(result_deferred, result) 

159 

160 def tearDown(self): 

161 removeTestTempDir(self.root) 

162 

163 def create_butler(self, run, storageClass, datasetTypeName): 

164 butler = Butler(self.tmpConfigFile, run=run) 

165 

166 collections = set(butler.registry.queryCollections()) 

167 self.assertEqual(collections, set([run])) 

168 

169 # Create and register a DatasetType 

170 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

171 

172 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

173 

174 # Add needed Dimensions 

175 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

176 butler.registry.insertDimensionData( 

177 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

178 ) 

179 butler.registry.insertDimensionData( 

180 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

181 ) 

182 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

183 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

184 butler.registry.insertDimensionData( 

185 "visit", 

186 { 

187 "instrument": "DummyCamComp", 

188 "id": 423, 

189 "name": "fourtwentythree", 

190 "physical_filter": "d-r", 

191 "visit_system": 1, 

192 "datetime_begin": visit_start, 

193 "datetime_end": visit_end, 

194 }, 

195 ) 

196 

197 # Add more visits for some later tests 

198 for visit_id in (424, 425): 

199 butler.registry.insertDimensionData( 

200 "visit", 

201 { 

202 "instrument": "DummyCamComp", 

203 "id": visit_id, 

204 "name": f"fourtwentyfour_{visit_id}", 

205 "physical_filter": "d-r", 

206 "visit_system": 1, 

207 }, 

208 ) 

209 return butler, datasetType 

210 

211 def runPutGetTest(self, storageClass, datasetTypeName): 

212 # New datasets will be added to run and tag, but we will only look in 

213 # tag when looking up datasets. 

214 run = "ingest" 

215 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

216 

217 # Create and store a dataset 

218 metric = makeExampleMetrics() 

219 dataId = {"instrument": "DummyCamComp", "visit": 423} 

220 

221 # Create a DatasetRef for put 

222 refIn = DatasetRef(datasetType, dataId, id=None) 

223 

224 # Put with a preexisting id should fail 

225 with self.assertRaises(ValueError): 

226 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

227 

228 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

229 # and once with a DatasetType 

230 

231 # Keep track of any collections we add and do not clean up 

232 expected_collections = {run} 

233 

234 counter = 0 

235 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

236 # Since we are using subTest we can get cascading failures 

237 # here with the first attempt failing and the others failing 

238 # immediately because the dataset already exists. Work around 

239 # this by using a distinct run collection each time 

240 counter += 1 

241 this_run = f"put_run_{counter}" 

242 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

243 expected_collections.update({this_run}) 

244 

245 with self.subTest(args=args): 

246 ref = butler.put(metric, *args, run=this_run) 

247 self.assertIsInstance(ref, DatasetRef) 

248 

249 # Test getDirect 

250 metricOut = butler.getDirect(ref) 

251 self.assertEqual(metric, metricOut) 

252 # Test get 

253 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

254 self.assertEqual(metric, metricOut) 

255 # Test get with a datasetRef 

256 metricOut = butler.get(ref, collections=this_run) 

257 self.assertEqual(metric, metricOut) 

258 # Test getDeferred with dataId 

259 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

260 self.assertEqual(metric, metricOut) 

261 # Test getDeferred with a datasetRef 

262 metricOut = butler.getDeferred(ref, collections=this_run).get() 

263 self.assertEqual(metric, metricOut) 

264 # and deferred direct with ref 

265 metricOut = butler.getDirectDeferred(ref).get() 

266 self.assertEqual(metric, metricOut) 

267 

268 # Check we can get components 

269 if storageClass.isComposite(): 

270 self.assertGetComponents( 

271 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

272 ) 

273 

274 # Can the artifacts themselves be retrieved? 

275 if not butler.datastore.isEphemeral: 

276 root_uri = ResourcePath(self.root) 

277 

278 for preserve_path in (True, False): 

279 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

280 # Use copy so that we can test that overwrite 

281 # protection works (using "auto" for File URIs would 

282 # use hard links and subsequent transfer would work 

283 # because it knows they are the same file). 

284 transferred = butler.retrieveArtifacts( 

285 [ref], destination, preserve_path=preserve_path, transfer="copy" 

286 ) 

287 self.assertGreater(len(transferred), 0) 

288 artifacts = list(ResourcePath.findFileResources([destination])) 

289 self.assertEqual(set(transferred), set(artifacts)) 

290 

291 for artifact in transferred: 

292 path_in_destination = artifact.relative_to(destination) 

293 self.assertIsNotNone(path_in_destination) 

294 

295 # when path is not preserved there should not be 

296 # any path separators. 

297 num_seps = path_in_destination.count("/") 

298 if preserve_path: 

299 self.assertGreater(num_seps, 0) 

300 else: 

301 self.assertEqual(num_seps, 0) 

302 

303 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

304 n_uris = len(secondary_uris) 

305 if primary_uri: 

306 n_uris += 1 

307 self.assertEqual( 

308 len(artifacts), 

309 n_uris, 

310 "Comparing expected artifacts vs actual:" 

311 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

312 ) 

313 

314 if preserve_path: 

315 # No need to run these twice 

316 with self.assertRaises(ValueError): 

317 butler.retrieveArtifacts([ref], destination, transfer="move") 

318 

319 with self.assertRaises(FileExistsError): 

320 butler.retrieveArtifacts([ref], destination) 

321 

322 transferred_again = butler.retrieveArtifacts( 

323 [ref], destination, preserve_path=preserve_path, overwrite=True 

324 ) 

325 self.assertEqual(set(transferred_again), set(transferred)) 

326 

327 # Now remove the dataset completely. 

328 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

329 # Lookup with original args should still fail. 

330 with self.assertRaises(LookupError): 

331 butler.datasetExists(*args, collections=this_run) 

332 # getDirect() should still fail. 

333 with self.assertRaises(FileNotFoundError): 

334 butler.getDirect(ref) 

335 # Registry shouldn't be able to find it by dataset_id anymore. 

336 self.assertIsNone(butler.registry.getDataset(ref.id)) 

337 

338 # Do explicit registry removal since we know they are 

339 # empty 

340 butler.registry.removeCollection(this_run) 

341 expected_collections.remove(this_run) 

342 

343 # Put the dataset again, since the last thing we did was remove it 

344 # and we want to use the default collection. 

345 ref = butler.put(metric, refIn) 

346 

347 # Get with parameters 

348 stop = 4 

349 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

350 self.assertNotEqual(metric, sliced) 

351 self.assertEqual(metric.summary, sliced.summary) 

352 self.assertEqual(metric.output, sliced.output) 

353 self.assertEqual(metric.data[:stop], sliced.data) 

354 # getDeferred with parameters 

355 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

356 self.assertNotEqual(metric, sliced) 

357 self.assertEqual(metric.summary, sliced.summary) 

358 self.assertEqual(metric.output, sliced.output) 

359 self.assertEqual(metric.data[:stop], sliced.data) 

360 # getDeferred with deferred parameters 

361 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 

367 if storageClass.isComposite(): 

368 # Check that components can be retrieved 

369 metricOut = butler.get(ref.datasetType.name, dataId) 

370 compNameS = ref.datasetType.componentTypeName("summary") 

371 compNameD = ref.datasetType.componentTypeName("data") 

372 summary = butler.get(compNameS, dataId) 

373 self.assertEqual(summary, metric.summary) 

374 data = butler.get(compNameD, dataId) 

375 self.assertEqual(data, metric.data) 

376 

377 if "counter" in storageClass.derivedComponents: 

378 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

379 self.assertEqual(count, len(data)) 

380 

381 count = butler.get( 

382 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

383 ) 

384 self.assertEqual(count, stop) 

385 

386 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

387 summary = butler.getDirect(compRef) 

388 self.assertEqual(summary, metric.summary) 

389 

390 # Create a Dataset type that has the same name but is inconsistent. 

391 inconsistentDatasetType = DatasetType( 

392 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

393 ) 

394 

395 # Getting with a dataset type that does not match registry fails 

396 with self.assertRaises(ValueError): 

397 butler.get(inconsistentDatasetType, dataId) 

398 

399 # Combining a DatasetRef with a dataId should fail 

400 with self.assertRaises(ValueError): 

401 butler.get(ref, dataId) 

402 # Getting with an explicit ref should fail if the id doesn't match 

403 with self.assertRaises(ValueError): 

404 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

405 

406 # Getting a dataset with unknown parameters should fail 

407 with self.assertRaises(KeyError): 

408 butler.get(ref, parameters={"unsupported": True}) 

409 

410 # Check we have a collection 

411 collections = set(butler.registry.queryCollections()) 

412 self.assertEqual(collections, expected_collections) 

413 

414 # Clean up to check that we can remove something that may have 

415 # already had a component removed 

416 butler.pruneDatasets([ref], unstore=True, purge=True) 

417 

418 # Check that we can configure a butler to accept a put even 

419 # if it already has the dataset in registry. 

420 ref = butler.put(metric, refIn) 

421 

422 # Repeat put will fail. 

423 with self.assertRaises(ConflictingDefinitionError): 

424 butler.put(metric, refIn) 

425 

426 # Remove the datastore entry. 

427 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

428 

429 # Put will still fail 

430 with self.assertRaises(ConflictingDefinitionError): 

431 butler.put(metric, refIn) 

432 

433 # Allow the put to succeed 

434 butler._allow_put_of_predefined_dataset = True 

435 ref2 = butler.put(metric, refIn) 

436 self.assertEqual(ref2.id, ref.id) 

437 

438 # A second put will still fail but with a different exception 

439 # than before. 

440 with self.assertRaises(ConflictingDefinitionError): 

441 butler.put(metric, refIn) 

442 

443 # Reset the flag to avoid confusion 

444 butler._allow_put_of_predefined_dataset = False 

445 

446 # Leave the dataset in place since some downstream tests require 

447 # something to be present 

448 

449 return butler 

450 

451 def testDeferredCollectionPassing(self): 

452 # Construct a butler with no run or collection, but make it writeable. 

453 butler = Butler(self.tmpConfigFile, writeable=True) 

454 # Create and register a DatasetType 

455 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

456 datasetType = self.addDatasetType( 

457 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

458 ) 

459 # Add needed Dimensions 

460 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

461 butler.registry.insertDimensionData( 

462 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

463 ) 

464 butler.registry.insertDimensionData( 

465 "visit", 

466 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

467 ) 

468 dataId = {"instrument": "DummyCamComp", "visit": 423} 

469 # Create dataset. 

470 metric = makeExampleMetrics() 

471 # Register a new run and put dataset. 

472 run = "deferred" 

473 self.assertTrue(butler.registry.registerRun(run)) 

474 # Second time it will be allowed but indicate no-op 

475 self.assertFalse(butler.registry.registerRun(run)) 

476 ref = butler.put(metric, datasetType, dataId, run=run) 

477 # Putting with no run should fail with TypeError. 

478 with self.assertRaises(CollectionError): 

479 butler.put(metric, datasetType, dataId) 

480 # Dataset should exist. 

481 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

482 # We should be able to get the dataset back, but with and without 

483 # a deferred dataset handle. 

484 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

485 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

486 # Trying to find the dataset without any collection is a TypeError. 

487 with self.assertRaises(CollectionError): 

488 butler.datasetExists(datasetType, dataId) 

489 with self.assertRaises(CollectionError): 

490 butler.get(datasetType, dataId) 

491 # Associate the dataset with a different collection. 

492 butler.registry.registerCollection("tagged") 

493 butler.registry.associate("tagged", [ref]) 

494 # Deleting the dataset from the new collection should make it findable 

495 # in the original collection. 

496 butler.pruneDatasets([ref], tags=["tagged"]) 

497 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

498 

499 

500class ButlerTests(ButlerPutGetTests): 

501 """Tests for Butler.""" 

502 

503 useTempRoot = True 

504 

505 def setUp(self): 

506 """Create a new butler root for each test.""" 

507 self.root = makeTestTempDir(TESTDIR) 

508 Butler.makeRepo(self.root, config=Config(self.configFile)) 

509 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

510 

511 def testConstructor(self): 

512 """Independent test of constructor.""" 

513 butler = Butler(self.tmpConfigFile, run="ingest") 

514 self.assertIsInstance(butler, Butler) 

515 

516 # Check that butler.yaml is added automatically. 

517 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

518 config_dir = self.tmpConfigFile[: -len(end)] 

519 butler = Butler(config_dir, run="ingest") 

520 self.assertIsInstance(butler, Butler) 

521 

522 collections = set(butler.registry.queryCollections()) 

523 self.assertEqual(collections, {"ingest"}) 

524 

525 # Check that some special characters can be included in run name. 

526 special_run = "u@b.c-A" 

527 butler_special = Butler(butler=butler, run=special_run) 

528 collections = set(butler_special.registry.queryCollections("*@*")) 

529 self.assertEqual(collections, {special_run}) 

530 

531 butler2 = Butler(butler=butler, collections=["other"]) 

532 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

533 self.assertIsNone(butler2.run) 

534 self.assertIs(butler.datastore, butler2.datastore) 

535 

536 # Test that we can use an environment variable to find this 

537 # repository. 

538 butler_index = Config() 

539 butler_index["label"] = self.tmpConfigFile 

540 for suffix in (".yaml", ".json"): 

541 # Ensure that the content differs so that we know that 

542 # we aren't reusing the cache. 

543 bad_label = f"s3://bucket/not_real{suffix}" 

544 butler_index["bad_label"] = bad_label 

545 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

546 butler_index.dumpToUri(temp_file) 

547 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

548 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

549 uri = Butler.get_repo_uri("bad_label") 

550 self.assertEqual(uri, ResourcePath(bad_label)) 

551 uri = Butler.get_repo_uri("label") 

552 butler = Butler(uri, writeable=False) 

553 self.assertIsInstance(butler, Butler) 

554 with self.assertRaises(KeyError) as cm: 

555 Butler.get_repo_uri("missing") 

556 self.assertIn("not known to", str(cm.exception)) 

557 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

558 with self.assertRaises(FileNotFoundError): 

559 Butler.get_repo_uri("label") 

560 self.assertEqual(Butler.get_known_repos(), set()) 

561 with self.assertRaises(KeyError) as cm: 

562 # No environment variable set. 

563 Butler.get_repo_uri("label") 

564 self.assertIn("No repository index defined", str(cm.exception)) 

565 self.assertEqual(Butler.get_known_repos(), set()) 

566 

567 def testBasicPutGet(self): 

568 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

569 self.runPutGetTest(storageClass, "test_metric") 

570 

571 def testCompositePutGetConcrete(self): 

572 

573 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

574 butler = self.runPutGetTest(storageClass, "test_metric") 

575 

576 # Should *not* be disassembled 

577 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

578 self.assertEqual(len(datasets), 1) 

579 uri, components = butler.getURIs(datasets[0]) 

580 self.assertIsInstance(uri, ResourcePath) 

581 self.assertFalse(components) 

582 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

583 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

584 

585 # Predicted dataset 

586 dataId = {"instrument": "DummyCamComp", "visit": 424} 

587 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

588 self.assertFalse(components) 

589 self.assertIsInstance(uri, ResourcePath) 

590 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

591 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

592 

593 def testCompositePutGetVirtual(self): 

594 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

595 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

596 

597 # Should be disassembled 

598 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

599 self.assertEqual(len(datasets), 1) 

600 uri, components = butler.getURIs(datasets[0]) 

601 

602 if butler.datastore.isEphemeral: 

603 # Never disassemble in-memory datastore 

604 self.assertIsInstance(uri, ResourcePath) 

605 self.assertFalse(components) 

606 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

607 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

608 else: 

609 self.assertIsNone(uri) 

610 self.assertEqual(set(components), set(storageClass.components)) 

611 for compuri in components.values(): 

612 self.assertIsInstance(compuri, ResourcePath) 

613 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

614 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

615 

616 # Predicted dataset 

617 dataId = {"instrument": "DummyCamComp", "visit": 424} 

618 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

619 

620 if butler.datastore.isEphemeral: 

621 # Never disassembled 

622 self.assertIsInstance(uri, ResourcePath) 

623 self.assertFalse(components) 

624 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

625 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

626 else: 

627 self.assertIsNone(uri) 

628 self.assertEqual(set(components), set(storageClass.components)) 

629 for compuri in components.values(): 

630 self.assertIsInstance(compuri, ResourcePath) 

631 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

632 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

633 

634 def testIngest(self): 

635 butler = Butler(self.tmpConfigFile, run="ingest") 

636 

637 # Create and register a DatasetType 

638 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

639 

640 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

641 datasetTypeName = "metric" 

642 

643 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

644 

645 # Add needed Dimensions 

646 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

647 butler.registry.insertDimensionData( 

648 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

649 ) 

650 for detector in (1, 2): 

651 butler.registry.insertDimensionData( 

652 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

653 ) 

654 

655 butler.registry.insertDimensionData( 

656 "visit", 

657 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

658 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

659 ) 

660 

661 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

662 dataRoot = os.path.join(TESTDIR, "data", "basic") 

663 datasets = [] 

664 for detector in (1, 2): 

665 detector_name = f"detector_{detector}" 

666 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

667 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

668 # Create a DatasetRef for ingest 

669 refIn = DatasetRef(datasetType, dataId, id=None) 

670 

671 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

672 

673 butler.ingest(*datasets, transfer="copy") 

674 

675 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

676 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

677 

678 metrics1 = butler.get(datasetTypeName, dataId1) 

679 metrics2 = butler.get(datasetTypeName, dataId2) 

680 self.assertNotEqual(metrics1, metrics2) 

681 

682 # Compare URIs 

683 uri1 = butler.getURI(datasetTypeName, dataId1) 

684 uri2 = butler.getURI(datasetTypeName, dataId2) 

685 self.assertNotEqual(uri1, uri2) 

686 

687 # Now do a multi-dataset but single file ingest 

688 metricFile = os.path.join(dataRoot, "detectors.yaml") 

689 refs = [] 

690 for detector in (1, 2): 

691 detector_name = f"detector_{detector}" 

692 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

693 # Create a DatasetRef for ingest 

694 refs.append(DatasetRef(datasetType, dataId, id=None)) 

695 

696 datasets = [] 

697 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

698 

699 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

700 

701 # Check that the datastore recorded no file size. 

702 # Not all datastores can support this. 

703 try: 

704 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

705 self.assertEqual(infos[0].file_size, -1) 

706 except AttributeError: 

707 pass 

708 

709 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

710 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

711 

712 multi1 = butler.get(datasetTypeName, dataId1) 

713 multi2 = butler.get(datasetTypeName, dataId2) 

714 

715 self.assertEqual(multi1, metrics1) 

716 self.assertEqual(multi2, metrics2) 

717 

718 # Compare URIs 

719 uri1 = butler.getURI(datasetTypeName, dataId1) 

720 uri2 = butler.getURI(datasetTypeName, dataId2) 

721 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

722 

723 # Test that removing one does not break the second 

724 # This line will issue a warning log message for a ChainedDatastore 

725 # that uses an InMemoryDatastore since in-memory can not ingest 

726 # files. 

727 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

728 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

729 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

730 multi2b = butler.get(datasetTypeName, dataId2) 

731 self.assertEqual(multi2, multi2b) 

732 

733 def testPruneCollections(self): 

734 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

735 butler = Butler(self.tmpConfigFile, writeable=True) 

736 # Load registry data with dimensions to hang datasets off of. 

737 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

738 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

739 # Add some RUN-type collections. 

740 run1 = "run1" 

741 butler.registry.registerRun(run1) 

742 run2 = "run2" 

743 butler.registry.registerRun(run2) 

744 # put some datasets. ref1 and ref2 have the same data ID, and are in 

745 # different runs. ref3 has a different data ID. 

746 metric = makeExampleMetrics() 

747 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

748 datasetType = self.addDatasetType( 

749 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

750 ) 

751 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

752 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

753 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

754 

755 # Try to delete a RUN collection without purge, or with purge and not 

756 # unstore. 

757 with self.assertRaises(TypeError): 

758 butler.pruneCollection(run1) 

759 with self.assertRaises(TypeError): 

760 butler.pruneCollection(run2, purge=True) 

761 # Add a TAGGED collection and associate ref3 only into it. 

762 tag1 = "tag1" 

763 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

764 self.assertTrue(registered) 

765 # Registering a second time should be allowed. 

766 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

767 self.assertFalse(registered) 

768 butler.registry.associate(tag1, [ref3]) 

769 # Add a CHAINED collection that searches run1 and then run2. It 

770 # logically contains only ref1, because ref2 is shadowed due to them 

771 # having the same data ID and dataset type. 

772 chain1 = "chain1" 

773 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

774 butler.registry.setCollectionChain(chain1, [run1, run2]) 

775 # Try to delete RUN collections, which should fail with complete 

776 # rollback because they're still referenced by the CHAINED 

777 # collection. 

778 with self.assertRaises(Exception): 

779 butler.pruneCollection(run1, pruge=True, unstore=True) 

780 with self.assertRaises(Exception): 

781 butler.pruneCollection(run2, pruge=True, unstore=True) 

782 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

783 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

784 self.assertTrue(existence[ref1]) 

785 self.assertTrue(existence[ref2]) 

786 self.assertTrue(existence[ref3]) 

787 # Try to delete CHAINED and TAGGED collections with purge; should not 

788 # work. 

789 with self.assertRaises(TypeError): 

790 butler.pruneCollection(tag1, purge=True, unstore=True) 

791 with self.assertRaises(TypeError): 

792 butler.pruneCollection(chain1, purge=True, unstore=True) 

793 # Remove the tagged collection with unstore=False. This should not 

794 # affect the datasets. 

795 butler.pruneCollection(tag1) 

796 with self.assertRaises(MissingCollectionError): 

797 butler.registry.getCollectionType(tag1) 

798 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

799 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

800 self.assertTrue(existence[ref1]) 

801 self.assertTrue(existence[ref2]) 

802 self.assertTrue(existence[ref3]) 

803 # Add the tagged collection back in, and remove it with unstore=True. 

804 # This should remove ref3 only from the datastore. 

805 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

806 butler.registry.associate(tag1, [ref3]) 

807 butler.pruneCollection(tag1, unstore=True) 

808 with self.assertRaises(MissingCollectionError): 

809 butler.registry.getCollectionType(tag1) 

810 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

811 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

812 self.assertTrue(existence[ref1]) 

813 self.assertTrue(existence[ref2]) 

814 self.assertFalse(existence[ref3]) 

815 # Delete the chain with unstore=False. The datasets should not be 

816 # affected at all. 

817 butler.pruneCollection(chain1) 

818 with self.assertRaises(MissingCollectionError): 

819 butler.registry.getCollectionType(chain1) 

820 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

821 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

822 self.assertTrue(existence[ref1]) 

823 self.assertTrue(existence[ref2]) 

824 self.assertFalse(existence[ref3]) 

825 # Redefine and then delete the chain with unstore=True. Only ref1 

826 # should be unstored (ref3 has already been unstored, but otherwise 

827 # would be now). 

828 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

829 butler.registry.setCollectionChain(chain1, [run1, run2]) 

830 butler.pruneCollection(chain1, unstore=True) 

831 with self.assertRaises(MissingCollectionError): 

832 butler.registry.getCollectionType(chain1) 

833 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

834 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

835 self.assertFalse(existence[ref1]) 

836 self.assertTrue(existence[ref2]) 

837 self.assertFalse(existence[ref3]) 

838 # Remove run1. This removes ref1 and ref3 from the registry (they're 

839 # already gone from the datastore, which is fine). 

840 butler.pruneCollection(run1, purge=True, unstore=True) 

841 with self.assertRaises(MissingCollectionError): 

842 butler.registry.getCollectionType(run1) 

843 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

844 self.assertTrue(butler.datastore.exists(ref2)) 

845 # Remove run2. This removes ref2 from the registry and the datastore. 

846 butler.pruneCollection(run2, purge=True, unstore=True) 

847 with self.assertRaises(MissingCollectionError): 

848 butler.registry.getCollectionType(run2) 

849 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

850 

851 # Now that the collections have been pruned we can remove the 

852 # dataset type 

853 butler.registry.removeDatasetType(datasetType.name) 

854 

855 def testPickle(self): 

856 """Test pickle support.""" 

857 butler = Butler(self.tmpConfigFile, run="ingest") 

858 butlerOut = pickle.loads(pickle.dumps(butler)) 

859 self.assertIsInstance(butlerOut, Butler) 

860 self.assertEqual(butlerOut._config, butler._config) 

861 self.assertEqual(butlerOut.collections, butler.collections) 

862 self.assertEqual(butlerOut.run, butler.run) 

863 

864 def testGetDatasetTypes(self): 

865 butler = Butler(self.tmpConfigFile, run="ingest") 

866 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

867 dimensionEntries = [ 

868 ( 

869 "instrument", 

870 {"instrument": "DummyCam"}, 

871 {"instrument": "DummyHSC"}, 

872 {"instrument": "DummyCamComp"}, 

873 ), 

874 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

875 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

876 ] 

877 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

878 # Add needed Dimensions 

879 for args in dimensionEntries: 

880 butler.registry.insertDimensionData(*args) 

881 

882 # When a DatasetType is added to the registry entries are not created 

883 # for components but querying them can return the components. 

884 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

885 components = set() 

886 for datasetTypeName in datasetTypeNames: 

887 # Create and register a DatasetType 

888 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

889 

890 for componentName in storageClass.components: 

891 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

892 

893 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

894 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

895 

896 # Now that we have some dataset types registered, validate them 

897 butler.validateConfiguration( 

898 ignore=[ 

899 "test_metric_comp", 

900 "metric3", 

901 "calexp", 

902 "DummySC", 

903 "datasetType.component", 

904 "random_data", 

905 "random_data_2", 

906 ] 

907 ) 

908 

909 # Add a new datasetType that will fail template validation 

910 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

911 if self.validationCanFail: 

912 with self.assertRaises(ValidationError): 

913 butler.validateConfiguration() 

914 

915 # Rerun validation but with a subset of dataset type names 

916 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

917 

918 # Rerun validation but ignore the bad datasetType 

919 butler.validateConfiguration( 

920 ignore=[ 

921 "test_metric_comp", 

922 "metric3", 

923 "calexp", 

924 "DummySC", 

925 "datasetType.component", 

926 "random_data", 

927 "random_data_2", 

928 ] 

929 ) 

930 

931 def testTransaction(self): 

932 butler = Butler(self.tmpConfigFile, run="ingest") 

933 datasetTypeName = "test_metric" 

934 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

935 dimensionEntries = ( 

936 ("instrument", {"instrument": "DummyCam"}), 

937 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

938 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

939 ) 

940 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

941 metric = makeExampleMetrics() 

942 dataId = {"instrument": "DummyCam", "visit": 42} 

943 # Create and register a DatasetType 

944 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

945 with self.assertRaises(TransactionTestError): 

946 with butler.transaction(): 

947 # Add needed Dimensions 

948 for args in dimensionEntries: 

949 butler.registry.insertDimensionData(*args) 

950 # Store a dataset 

951 ref = butler.put(metric, datasetTypeName, dataId) 

952 self.assertIsInstance(ref, DatasetRef) 

953 # Test getDirect 

954 metricOut = butler.getDirect(ref) 

955 self.assertEqual(metric, metricOut) 

956 # Test get 

957 metricOut = butler.get(datasetTypeName, dataId) 

958 self.assertEqual(metric, metricOut) 

959 # Check we can get components 

960 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

961 raise TransactionTestError("This should roll back the entire transaction") 

962 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

963 butler.registry.expandDataId(dataId) 

964 # Should raise LookupError for missing data ID value 

965 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

966 butler.get(datasetTypeName, dataId) 

967 # Also check explicitly if Dataset entry is missing 

968 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

969 # Direct retrieval should not find the file in the Datastore 

970 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

971 butler.getDirect(ref) 

972 

973 def testMakeRepo(self): 

974 """Test that we can write butler configuration to a new repository via 

975 the Butler.makeRepo interface and then instantiate a butler from the 

976 repo root. 

977 """ 

978 # Do not run the test if we know this datastore configuration does 

979 # not support a file system root 

980 if self.fullConfigKey is None: 

981 return 

982 

983 # create two separate directories 

984 root1 = tempfile.mkdtemp(dir=self.root) 

985 root2 = tempfile.mkdtemp(dir=self.root) 

986 

987 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

988 limited = Config(self.configFile) 

989 butler1 = Butler(butlerConfig) 

990 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

991 full = Config(self.tmpConfigFile) 

992 butler2 = Butler(butlerConfig) 

993 # Butlers should have the same configuration regardless of whether 

994 # defaults were expanded. 

995 self.assertEqual(butler1._config, butler2._config) 

996 # Config files loaded directly should not be the same. 

997 self.assertNotEqual(limited, full) 

998 # Make sure "limited" doesn't have a few keys we know it should be 

999 # inheriting from defaults. 

1000 self.assertIn(self.fullConfigKey, full) 

1001 self.assertNotIn(self.fullConfigKey, limited) 

1002 

1003 # Collections don't appear until something is put in them 

1004 collections1 = set(butler1.registry.queryCollections()) 

1005 self.assertEqual(collections1, set()) 

1006 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1007 

1008 # Check that a config with no associated file name will not 

1009 # work properly with relocatable Butler repo 

1010 butlerConfig.configFile = None 

1011 with self.assertRaises(ValueError): 

1012 Butler(butlerConfig) 

1013 

1014 with self.assertRaises(FileExistsError): 

1015 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1016 

1017 def testStringification(self): 

1018 butler = Butler(self.tmpConfigFile, run="ingest") 

1019 butlerStr = str(butler) 

1020 

1021 if self.datastoreStr is not None: 

1022 for testStr in self.datastoreStr: 

1023 self.assertIn(testStr, butlerStr) 

1024 if self.registryStr is not None: 

1025 self.assertIn(self.registryStr, butlerStr) 

1026 

1027 datastoreName = butler.datastore.name 

1028 if self.datastoreName is not None: 

1029 for testStr in self.datastoreName: 

1030 self.assertIn(testStr, datastoreName) 

1031 

1032 def testButlerRewriteDataId(self): 

1033 """Test that dataIds can be rewritten based on dimension records.""" 

1034 

1035 butler = Butler(self.tmpConfigFile, run="ingest") 

1036 

1037 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1038 datasetTypeName = "random_data" 

1039 

1040 # Create dimension records. 

1041 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1042 butler.registry.insertDimensionData( 

1043 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1044 ) 

1045 butler.registry.insertDimensionData( 

1046 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1047 ) 

1048 

1049 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1050 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1051 butler.registry.registerDatasetType(datasetType) 

1052 

1053 n_exposures = 5 

1054 dayobs = 20210530 

1055 

1056 for i in range(n_exposures): 

1057 butler.registry.insertDimensionData( 

1058 "exposure", 

1059 { 

1060 "instrument": "DummyCamComp", 

1061 "id": i, 

1062 "obs_id": f"exp{i}", 

1063 "seq_num": i, 

1064 "day_obs": dayobs, 

1065 "physical_filter": "d-r", 

1066 }, 

1067 ) 

1068 

1069 # Write some data. 

1070 for i in range(n_exposures): 

1071 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1072 

1073 # Use the seq_num for the put to test rewriting. 

1074 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1075 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1076 

1077 # Check that the exposure is correct in the dataId 

1078 self.assertEqual(ref.dataId["exposure"], i) 

1079 

1080 # and check that we can get the dataset back with the same dataId 

1081 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1082 self.assertEqual(new_metric, metric) 

1083 

1084 

1085class FileDatastoreButlerTests(ButlerTests): 

1086 """Common tests and specialization of ButlerTests for butlers backed 

1087 by datastores that inherit from FileDatastore. 

1088 """ 

1089 

1090 def checkFileExists(self, root, relpath): 

1091 """Checks if file exists at a given path (relative to root). 

1092 

1093 Test testPutTemplates verifies actual physical existance of the files 

1094 in the requested location. 

1095 """ 

1096 uri = ResourcePath(root, forceDirectory=True) 

1097 return uri.join(relpath).exists() 

1098 

1099 def testPutTemplates(self): 

1100 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1101 butler = Butler(self.tmpConfigFile, run="ingest") 

1102 

1103 # Add needed Dimensions 

1104 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1105 butler.registry.insertDimensionData( 

1106 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1107 ) 

1108 butler.registry.insertDimensionData( 

1109 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1110 ) 

1111 butler.registry.insertDimensionData( 

1112 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1113 ) 

1114 

1115 # Create and store a dataset 

1116 metric = makeExampleMetrics() 

1117 

1118 # Create two almost-identical DatasetTypes (both will use default 

1119 # template) 

1120 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1121 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1122 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1123 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1124 

1125 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1126 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1127 

1128 # Put with exactly the data ID keys needed 

1129 ref = butler.put(metric, "metric1", dataId1) 

1130 uri = butler.getURI(ref) 

1131 self.assertTrue( 

1132 self.checkFileExists(butler.datastore.root, "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1133 f"Checking existence of {uri}", 

1134 ) 

1135 

1136 # Check the template based on dimensions 

1137 butler.datastore.templates.validateTemplates([ref]) 

1138 

1139 # Put with extra data ID keys (physical_filter is an optional 

1140 # dependency); should not change template (at least the way we're 

1141 # defining them to behave now; the important thing is that they 

1142 # must be consistent). 

1143 ref = butler.put(metric, "metric2", dataId2) 

1144 uri = butler.getURI(ref) 

1145 self.assertTrue( 

1146 self.checkFileExists(butler.datastore.root, "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1147 f"Checking existence of {uri}", 

1148 ) 

1149 

1150 # Check the template based on dimensions 

1151 butler.datastore.templates.validateTemplates([ref]) 

1152 

1153 # Now use a file template that will not result in unique filenames 

1154 with self.assertRaises(FileTemplateValidationError): 

1155 butler.put(metric, "metric3", dataId1) 

1156 

1157 def testImportExport(self): 

1158 # Run put/get tests just to create and populate a repo. 

1159 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1160 self.runImportExportTest(storageClass) 

1161 

1162 @unittest.expectedFailure 

1163 def testImportExportVirtualComposite(self): 

1164 # Run put/get tests just to create and populate a repo. 

1165 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1166 self.runImportExportTest(storageClass) 

1167 

1168 def runImportExportTest(self, storageClass): 

1169 """This test does an export to a temp directory and an import back 

1170 into a new temp directory repo. It does not assume a posix datastore""" 

1171 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1172 print("Root:", exportButler.datastore.root) 

1173 # Test that the repo actually has at least one dataset. 

1174 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1175 self.assertGreater(len(datasets), 0) 

1176 # Add a DimensionRecord that's unused by those datasets. 

1177 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1178 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1179 # Export and then import datasets. 

1180 with safeTestTempDir(TESTDIR) as exportDir: 

1181 exportFile = os.path.join(exportDir, "exports.yaml") 

1182 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1183 export.saveDatasets(datasets) 

1184 # Export the same datasets again. This should quietly do 

1185 # nothing because of internal deduplication, and it shouldn't 

1186 # complain about being asked to export the "htm7" elements even 

1187 # though there aren't any in these datasets or in the database. 

1188 export.saveDatasets(datasets, elements=["htm7"]) 

1189 # Save one of the data IDs again; this should be harmless 

1190 # because of internal deduplication. 

1191 export.saveDataIds([datasets[0].dataId]) 

1192 # Save some dimension records directly. 

1193 export.saveDimensionData("skymap", [skymapRecord]) 

1194 self.assertTrue(os.path.exists(exportFile)) 

1195 with safeTestTempDir(TESTDIR) as importDir: 

1196 # We always want this to be a local posix butler 

1197 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1198 # Calling script.butlerImport tests the implementation of the 

1199 # butler command line interface "import" subcommand. Functions 

1200 # in the script folder are generally considered protected and 

1201 # should not be used as public api. 

1202 with open(exportFile, "r") as f: 

1203 script.butlerImport( 

1204 importDir, 

1205 export_file=f, 

1206 directory=exportDir, 

1207 transfer="auto", 

1208 skip_dimensions=None, 

1209 reuse_ids=False, 

1210 ) 

1211 importButler = Butler(importDir, run="ingest") 

1212 for ref in datasets: 

1213 with self.subTest(ref=ref): 

1214 # Test for existence by passing in the DatasetType and 

1215 # data ID separately, to avoid lookup by dataset_id. 

1216 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1217 self.assertEqual( 

1218 list(importButler.registry.queryDimensionRecords("skymap")), 

1219 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1220 ) 

1221 

1222 def testRemoveRuns(self): 

1223 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1224 butler = Butler(self.tmpConfigFile, writeable=True) 

1225 # Load registry data with dimensions to hang datasets off of. 

1226 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1227 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1228 # Add some RUN-type collection. 

1229 run1 = "run1" 

1230 butler.registry.registerRun(run1) 

1231 run2 = "run2" 

1232 butler.registry.registerRun(run2) 

1233 # put a dataset in each 

1234 metric = makeExampleMetrics() 

1235 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1236 datasetType = self.addDatasetType( 

1237 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1238 ) 

1239 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1240 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1241 uri1 = butler.getURI(ref1, collections=[run1]) 

1242 uri2 = butler.getURI(ref2, collections=[run2]) 

1243 # Remove from both runs with different values for unstore. 

1244 butler.removeRuns([run1], unstore=True) 

1245 butler.removeRuns([run2], unstore=False) 

1246 # Should be nothing in registry for either one, and datastore should 

1247 # not think either exists. 

1248 with self.assertRaises(MissingCollectionError): 

1249 butler.registry.getCollectionType(run1) 

1250 with self.assertRaises(MissingCollectionError): 

1251 butler.registry.getCollectionType(run2) 

1252 self.assertFalse(butler.datastore.exists(ref1)) 

1253 self.assertFalse(butler.datastore.exists(ref2)) 

1254 # The ref we unstored should be gone according to the URI, but the 

1255 # one we forgot should still be around. 

1256 self.assertFalse(uri1.exists()) 

1257 self.assertTrue(uri2.exists()) 

1258 

1259 

1260class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1261 """PosixDatastore specialization of a butler""" 

1262 

1263 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1264 fullConfigKey = ".datastore.formatters" 

1265 validationCanFail = True 

1266 datastoreStr = ["/tmp"] 

1267 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1268 registryStr = "/gen3.sqlite3" 

1269 

1270 def testPathConstructor(self): 

1271 """Independent test of constructor using PathLike.""" 

1272 butler = Butler(self.tmpConfigFile, run="ingest") 

1273 self.assertIsInstance(butler, Butler) 

1274 

1275 # And again with a Path object with the butler yaml 

1276 path = pathlib.Path(self.tmpConfigFile) 

1277 butler = Butler(path, writeable=False) 

1278 self.assertIsInstance(butler, Butler) 

1279 

1280 # And again with a Path object without the butler yaml 

1281 # (making sure we skip it if the tmp config doesn't end 

1282 # in butler.yaml -- which is the case for a subclass) 

1283 if self.tmpConfigFile.endswith("butler.yaml"): 

1284 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1285 butler = Butler(path, writeable=False) 

1286 self.assertIsInstance(butler, Butler) 

1287 

1288 def testExportTransferCopy(self): 

1289 """Test local export using all transfer modes""" 

1290 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1291 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1292 # Test that the repo actually has at least one dataset. 

1293 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1294 self.assertGreater(len(datasets), 0) 

1295 uris = [exportButler.getURI(d) for d in datasets] 

1296 datastoreRoot = exportButler.datastore.root 

1297 

1298 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1299 

1300 for path in pathsInStore: 

1301 # Assume local file system 

1302 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1303 

1304 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1305 with safeTestTempDir(TESTDIR) as exportDir: 

1306 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1307 export.saveDatasets(datasets) 

1308 for path in pathsInStore: 

1309 self.assertTrue( 

1310 self.checkFileExists(exportDir, path), 

1311 f"Check that mode {transfer} exported files", 

1312 ) 

1313 

1314 def testPruneDatasets(self): 

1315 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1316 butler = Butler(self.tmpConfigFile, writeable=True) 

1317 # Load registry data with dimensions to hang datasets off of. 

1318 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1319 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1320 # Add some RUN-type collections. 

1321 run1 = "run1" 

1322 butler.registry.registerRun(run1) 

1323 run2 = "run2" 

1324 butler.registry.registerRun(run2) 

1325 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1326 # different runs. ref3 has a different data ID. 

1327 metric = makeExampleMetrics() 

1328 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1329 datasetType = self.addDatasetType( 

1330 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1331 ) 

1332 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1333 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1334 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1335 

1336 # Simple prune. 

1337 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1338 with self.assertRaises(LookupError): 

1339 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1340 

1341 # Put data back. 

1342 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1343 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1344 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1345 

1346 # Check that in normal mode, deleting the record will lead to 

1347 # trash not touching the file. 

1348 uri1 = butler.datastore.getURI(ref1) 

1349 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1350 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1351 butler.datastore.trash(ref1) 

1352 butler.datastore.emptyTrash() 

1353 self.assertTrue(uri1.exists()) 

1354 uri1.remove() # Clean it up. 

1355 

1356 # Simulate execution butler setup by deleting the datastore 

1357 # record but keeping the file around and trusting. 

1358 butler.datastore.trustGetRequest = True 

1359 uri2 = butler.datastore.getURI(ref2) 

1360 uri3 = butler.datastore.getURI(ref3) 

1361 self.assertTrue(uri2.exists()) 

1362 self.assertTrue(uri3.exists()) 

1363 

1364 # Remove the datastore record. 

1365 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1366 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1367 self.assertTrue(uri2.exists()) 

1368 butler.datastore.trash([ref2, ref3]) 

1369 # Immediate removal for ref2 file 

1370 self.assertFalse(uri2.exists()) 

1371 # But ref3 has to wait for the empty. 

1372 self.assertTrue(uri3.exists()) 

1373 butler.datastore.emptyTrash() 

1374 self.assertFalse(uri3.exists()) 

1375 

1376 # Clear out the datasets from registry. 

1377 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1378 

1379 def testPytypePutCoercion(self): 

1380 """Test python type coercion on Butler.get and put.""" 

1381 

1382 # Store some data with the normal example storage class. 

1383 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1384 datasetTypeName = "test_metric" 

1385 butler, _ = self.create_butler("ingest", storageClass, datasetTypeName) 

1386 

1387 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1388 

1389 # Put a dict and this should coerce to a MetricsExample 

1390 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1391 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1392 test_metric = butler.getDirect(metric_ref) 

1393 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1394 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1395 self.assertEqual(test_metric.output, test_dict["output"]) 

1396 

1397 # Check that the put still works if a DatasetType is given with 

1398 # a definition matching this python type. 

1399 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1400 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1401 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1402 self.assertEqual(metric2_ref.datasetType, registry_type) 

1403 

1404 # The get will return the type expected by registry. 

1405 test_metric2 = butler.getDirect(metric2_ref) 

1406 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1407 

1408 # Make a new DatasetRef with the compatible but different DatasetType. 

1409 # This should now return a dict. 

1410 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1411 test_dict2 = butler.getDirect(new_ref) 

1412 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1413 

1414 # Get it again with the wrong dataset type definition using get() 

1415 # rather than getDirect(). This should be consistent with getDirect() 

1416 # behavior and return the type of the DatasetType. 

1417 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1418 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1419 

1420 def testPytypeCoercion(self): 

1421 """Test python type coercion on Butler.get and put.""" 

1422 

1423 # Store some data with the normal example storage class. 

1424 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1425 datasetTypeName = "test_metric" 

1426 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1427 

1428 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1429 metric = butler.get(datasetTypeName, dataId=dataId) 

1430 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1431 

1432 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1433 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1434 

1435 # Now need to hack the registry dataset type definition. 

1436 # There is no API for this. 

1437 manager = butler.registry._managers.datasets 

1438 manager._db.update( 

1439 manager._static.dataset_type, 

1440 {"name": datasetTypeName}, 

1441 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1442 ) 

1443 

1444 # Force reset of dataset type cache 

1445 butler.registry.refresh() 

1446 

1447 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1448 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1449 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1450 

1451 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1452 self.assertNotEqual(type(metric_model), type(metric)) 

1453 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1454 

1455 # Put the model and read it back to show that everything now 

1456 # works as normal. 

1457 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1458 metric_model_new = butler.get(metric_ref) 

1459 self.assertEqual(metric_model_new, metric_model) 

1460 

1461 # Hack the storage class again to something that will fail on the 

1462 # get with no conversion class. 

1463 manager._db.update( 

1464 manager._static.dataset_type, 

1465 {"name": datasetTypeName}, 

1466 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1467 ) 

1468 butler.registry.refresh() 

1469 

1470 with self.assertRaises(ValueError): 

1471 butler.get(datasetTypeName, dataId=dataId) 

1472 

1473 

1474class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1475 """InMemoryDatastore specialization of a butler""" 

1476 

1477 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1478 fullConfigKey = None 

1479 useTempRoot = False 

1480 validationCanFail = False 

1481 datastoreStr = ["datastore='InMemory"] 

1482 datastoreName = ["InMemoryDatastore@"] 

1483 registryStr = "/gen3.sqlite3" 

1484 

1485 def testIngest(self): 

1486 pass 

1487 

1488 

1489class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1490 """PosixDatastore specialization""" 

1491 

1492 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1493 fullConfigKey = ".datastore.datastores.1.formatters" 

1494 validationCanFail = True 

1495 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1496 datastoreName = [ 

1497 "InMemoryDatastore@", 

1498 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1499 "SecondDatastore", 

1500 ] 

1501 registryStr = "/gen3.sqlite3" 

1502 

1503 

1504class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1505 """Test that a yaml file in one location can refer to a root in another.""" 

1506 

1507 datastoreStr = ["dir1"] 

1508 # Disable the makeRepo test since we are deliberately not using 

1509 # butler.yaml as the config name. 

1510 fullConfigKey = None 

1511 

1512 def setUp(self): 

1513 self.root = makeTestTempDir(TESTDIR) 

1514 

1515 # Make a new repository in one place 

1516 self.dir1 = os.path.join(self.root, "dir1") 

1517 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1518 

1519 # Move the yaml file to a different place and add a "root" 

1520 self.dir2 = os.path.join(self.root, "dir2") 

1521 os.makedirs(self.dir2, exist_ok=True) 

1522 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1523 config = Config(configFile1) 

1524 config["root"] = self.dir1 

1525 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1526 config.dumpToUri(configFile2) 

1527 os.remove(configFile1) 

1528 self.tmpConfigFile = configFile2 

1529 

1530 def testFileLocations(self): 

1531 self.assertNotEqual(self.dir1, self.dir2) 

1532 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1533 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1534 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1535 

1536 

1537class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1538 """Test that a config file created by makeRepo outside of repo works.""" 

1539 

1540 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1541 

1542 def setUp(self): 

1543 self.root = makeTestTempDir(TESTDIR) 

1544 self.root2 = makeTestTempDir(TESTDIR) 

1545 

1546 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1547 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1548 

1549 def tearDown(self): 

1550 if os.path.exists(self.root2): 

1551 shutil.rmtree(self.root2, ignore_errors=True) 

1552 super().tearDown() 

1553 

1554 def testConfigExistence(self): 

1555 c = Config(self.tmpConfigFile) 

1556 uri_config = ResourcePath(c["root"]) 

1557 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1558 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1559 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1560 

1561 def testPutGet(self): 

1562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1563 self.runPutGetTest(storageClass, "test_metric") 

1564 

1565 

1566class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1567 """Test that a config file created by makeRepo outside of repo works.""" 

1568 

1569 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1570 

1571 def setUp(self): 

1572 self.root = makeTestTempDir(TESTDIR) 

1573 self.root2 = makeTestTempDir(TESTDIR) 

1574 

1575 self.tmpConfigFile = self.root2 

1576 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1577 

1578 def testConfigExistence(self): 

1579 # Append the yaml file else Config constructor does not know the file 

1580 # type. 

1581 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1582 super().testConfigExistence() 

1583 

1584 

1585class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1586 """Test that a config file created by makeRepo outside of repo works.""" 

1587 

1588 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1589 

1590 def setUp(self): 

1591 self.root = makeTestTempDir(TESTDIR) 

1592 self.root2 = makeTestTempDir(TESTDIR) 

1593 

1594 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1595 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1596 

1597 

1598@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1599class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1600 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1601 a local in-memory SqlRegistry. 

1602 """ 

1603 

1604 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1605 fullConfigKey = None 

1606 validationCanFail = True 

1607 

1608 bucketName = "anybucketname" 

1609 """Name of the Bucket that will be used in the tests. The name is read from 

1610 the config file used with the tests during set-up. 

1611 """ 

1612 

1613 root = "butlerRoot/" 

1614 """Root repository directory expected to be used in case useTempRoot=False. 

1615 Otherwise the root is set to a 20 characters long randomly generated string 

1616 during set-up. 

1617 """ 

1618 

1619 datastoreStr = [f"datastore={root}"] 

1620 """Contains all expected root locations in a format expected to be 

1621 returned by Butler stringification. 

1622 """ 

1623 

1624 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1625 """The expected format of the S3 Datastore string.""" 

1626 

1627 registryStr = "/gen3.sqlite3" 

1628 """Expected format of the Registry string.""" 

1629 

1630 mock_s3 = mock_s3() 

1631 """The mocked s3 interface from moto.""" 

1632 

1633 def genRoot(self): 

1634 """Returns a random string of len 20 to serve as a root 

1635 name for the temporary bucket repo. 

1636 

1637 This is equivalent to tempfile.mkdtemp as this is what self.root 

1638 becomes when useTempRoot is True. 

1639 """ 

1640 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1641 return rndstr + "/" 

1642 

1643 def setUp(self): 

1644 config = Config(self.configFile) 

1645 uri = ResourcePath(config[".datastore.datastore.root"]) 

1646 self.bucketName = uri.netloc 

1647 

1648 # Enable S3 mocking of tests. 

1649 self.mock_s3.start() 

1650 

1651 # set up some fake credentials if they do not exist 

1652 self.usingDummyCredentials = setAwsEnvCredentials() 

1653 

1654 if self.useTempRoot: 

1655 self.root = self.genRoot() 

1656 rooturi = f"s3://{self.bucketName}/{self.root}" 

1657 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1658 

1659 # need local folder to store registry database 

1660 self.reg_dir = makeTestTempDir(TESTDIR) 

1661 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1662 

1663 # MOTO needs to know that we expect Bucket bucketname to exist 

1664 # (this used to be the class attribute bucketName) 

1665 s3 = boto3.resource("s3") 

1666 s3.create_bucket(Bucket=self.bucketName) 

1667 

1668 self.datastoreStr = f"datastore={self.root}" 

1669 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1670 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1671 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1672 

1673 def tearDown(self): 

1674 s3 = boto3.resource("s3") 

1675 bucket = s3.Bucket(self.bucketName) 

1676 try: 

1677 bucket.objects.all().delete() 

1678 except botocore.exceptions.ClientError as e: 

1679 if e.response["Error"]["Code"] == "404": 

1680 # the key was not reachable - pass 

1681 pass 

1682 else: 

1683 raise 

1684 

1685 bucket = s3.Bucket(self.bucketName) 

1686 bucket.delete() 

1687 

1688 # Stop the S3 mock. 

1689 self.mock_s3.stop() 

1690 

1691 # unset any potentially set dummy credentials 

1692 if self.usingDummyCredentials: 

1693 unsetAwsEnvCredentials() 

1694 

1695 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1696 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1697 

1698 if self.useTempRoot and os.path.exists(self.root): 

1699 shutil.rmtree(self.root, ignore_errors=True) 

1700 

1701 

1702@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1703# Mock required environment variables during tests 

1704@unittest.mock.patch.dict( 

1705 os.environ, 

1706 { 

1707 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1708 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1709 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1710 }, 

1711) 

1712class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1713 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1714 a local in-memory SqlRegistry. 

1715 """ 

1716 

1717 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1718 fullConfigKey = None 

1719 validationCanFail = True 

1720 

1721 serverName = "localhost" 

1722 """Name of the server that will be used in the tests. 

1723 """ 

1724 

1725 portNumber = 8080 

1726 """Port on which the webdav server listens. Automatically chosen 

1727 at setUpClass via the _getfreeport() method 

1728 """ 

1729 

1730 root = "butlerRoot/" 

1731 """Root repository directory expected to be used in case useTempRoot=False. 

1732 Otherwise the root is set to a 20 characters long randomly generated string 

1733 during set-up. 

1734 """ 

1735 

1736 datastoreStr = [f"datastore={root}"] 

1737 """Contains all expected root locations in a format expected to be 

1738 returned by Butler stringification. 

1739 """ 

1740 

1741 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1742 """The expected format of the WebdavDatastore string.""" 

1743 

1744 registryStr = "/gen3.sqlite3" 

1745 """Expected format of the Registry string.""" 

1746 

1747 serverThread = None 

1748 """Thread in which the local webdav server will run""" 

1749 

1750 stopWebdavServer = False 

1751 """This flag will cause the webdav server to 

1752 gracefully shut down when True 

1753 """ 

1754 

1755 def genRoot(self): 

1756 """Returns a random string of len 20 to serve as a root 

1757 name for the temporary bucket repo. 

1758 

1759 This is equivalent to tempfile.mkdtemp as this is what self.root 

1760 becomes when useTempRoot is True. 

1761 """ 

1762 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1763 return rndstr + "/" 

1764 

1765 @classmethod 

1766 def setUpClass(cls): 

1767 # Do the same as inherited class 

1768 cls.storageClassFactory = StorageClassFactory() 

1769 cls.storageClassFactory.addFromConfig(cls.configFile) 

1770 

1771 cls.portNumber = cls._getfreeport() 

1772 # Run a local webdav server on which tests will be run 

1773 cls.serverThread = Thread( 

1774 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1775 ) 

1776 cls.serverThread.start() 

1777 # Wait for it to start 

1778 time.sleep(3) 

1779 

1780 @classmethod 

1781 def tearDownClass(cls): 

1782 # Ask for graceful shut down of the webdav server 

1783 cls.stopWebdavServer = True 

1784 # Wait for the thread to exit 

1785 cls.serverThread.join() 

1786 

1787 # Mock required environment variables during tests 

1788 @unittest.mock.patch.dict( 

1789 os.environ, 

1790 { 

1791 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1792 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1793 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1794 }, 

1795 ) 

1796 def setUp(self): 

1797 config = Config(self.configFile) 

1798 

1799 if self.useTempRoot: 

1800 self.root = self.genRoot() 

1801 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1802 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1803 

1804 # need local folder to store registry database 

1805 self.reg_dir = makeTestTempDir(TESTDIR) 

1806 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1807 

1808 self.datastoreStr = f"datastore={self.root}" 

1809 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1810 

1811 if not isWebdavEndpoint(self.rooturi): 

1812 raise OSError("Webdav server not running properly: cannot run tests.") 

1813 

1814 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1815 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1816 

1817 # Mock required environment variables during tests 

1818 @unittest.mock.patch.dict( 

1819 os.environ, 

1820 { 

1821 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1822 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1823 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1824 }, 

1825 ) 

1826 def tearDown(self): 

1827 # Clear temporary directory 

1828 ResourcePath(self.rooturi).remove() 

1829 ResourcePath(self.rooturi).session.close() 

1830 

1831 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1832 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1833 

1834 if self.useTempRoot and os.path.exists(self.root): 

1835 shutil.rmtree(self.root, ignore_errors=True) 

1836 

1837 def _serveWebdav(self, port: int, stopWebdavServer): 

1838 """Starts a local webdav-compatible HTTP server, 

1839 Listening on http://localhost:port 

1840 This server only runs when this test class is instantiated, 

1841 and then shuts down. Must be started is a separate thread. 

1842 

1843 Parameters 

1844 ---------- 

1845 port : `int` 

1846 The port number on which the server should listen 

1847 """ 

1848 root_path = gettempdir() 

1849 

1850 config = { 

1851 "host": "0.0.0.0", 

1852 "port": port, 

1853 "provider_mapping": {"/": root_path}, 

1854 "http_authenticator": {"domain_controller": None}, 

1855 "simple_dc": {"user_mapping": {"*": True}}, 

1856 "verbose": 0, 

1857 } 

1858 app = WsgiDAVApp(config) 

1859 

1860 server_args = { 

1861 "bind_addr": (config["host"], config["port"]), 

1862 "wsgi_app": app, 

1863 } 

1864 server = wsgi.Server(**server_args) 

1865 server.prepare() 

1866 

1867 try: 

1868 # Start the actual server in a separate thread 

1869 t = Thread(target=server.serve, daemon=True) 

1870 t.start() 

1871 # watch stopWebdavServer, and gracefully 

1872 # shut down the server when True 

1873 while True: 

1874 if stopWebdavServer(): 

1875 break 

1876 time.sleep(1) 

1877 except KeyboardInterrupt: 

1878 print("Caught Ctrl-C, shutting down...") 

1879 finally: 

1880 server.stop() 

1881 t.join() 

1882 

1883 def _getfreeport(): 

1884 """ 

1885 Determines a free port using sockets. 

1886 """ 

1887 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1888 free_socket.bind(("0.0.0.0", 0)) 

1889 free_socket.listen() 

1890 port = free_socket.getsockname()[1] 

1891 free_socket.close() 

1892 return port 

1893 

1894 

1895class PosixDatastoreTransfers(unittest.TestCase): 

1896 """Test data transfers between butlers. 

1897 

1898 Test for different managers. UUID to UUID and integer to integer are 

1899 tested. UUID to integer is not supported since we do not currently 

1900 want to allow that. Integer to UUID is supported with the caveat 

1901 that UUID4 will be generated and this will be incorrect for raw 

1902 dataset types. The test ignores that. 

1903 """ 

1904 

1905 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1906 

1907 @classmethod 

1908 def setUpClass(cls): 

1909 cls.storageClassFactory = StorageClassFactory() 

1910 cls.storageClassFactory.addFromConfig(cls.configFile) 

1911 

1912 def setUp(self): 

1913 self.root = makeTestTempDir(TESTDIR) 

1914 self.config = Config(self.configFile) 

1915 

1916 def tearDown(self): 

1917 removeTestTempDir(self.root) 

1918 

1919 def create_butler(self, manager, label): 

1920 config = Config(self.configFile) 

1921 config["registry", "managers", "datasets"] = manager 

1922 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1923 

1924 def create_butlers(self, manager1, manager2): 

1925 self.source_butler = self.create_butler(manager1, "1") 

1926 self.target_butler = self.create_butler(manager2, "2") 

1927 

1928 def testTransferUuidToUuid(self): 

1929 self.create_butlers( 

1930 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1931 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1932 ) 

1933 # Setting id_gen_map should have no effect here 

1934 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1935 

1936 def testTransferIntToInt(self): 

1937 self.create_butlers( 

1938 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1939 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1940 ) 

1941 # int dataset ID only allows UNIQUE 

1942 self.assertButlerTransfers() 

1943 

1944 def testTransferIntToUuid(self): 

1945 self.create_butlers( 

1946 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1947 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1948 ) 

1949 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1950 

1951 def testTransferMissing(self): 

1952 """Test transfers where datastore records are missing. 

1953 

1954 This is how execution butler works. 

1955 """ 

1956 self.create_butlers( 

1957 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1958 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1959 ) 

1960 

1961 # Configure the source butler to allow trust. 

1962 self.source_butler.datastore.trustGetRequest = True 

1963 

1964 self.assertButlerTransfers(purge=True) 

1965 

1966 def testTransferMissingDisassembly(self): 

1967 """Test transfers where datastore records are missing. 

1968 

1969 This is how execution butler works. 

1970 """ 

1971 self.create_butlers( 

1972 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1973 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1974 ) 

1975 

1976 # Configure the source butler to allow trust. 

1977 self.source_butler.datastore.trustGetRequest = True 

1978 

1979 # Test disassembly. 

1980 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1981 

1982 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1983 """Test that a run can be transferred to another butler.""" 

1984 

1985 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1986 datasetTypeName = "random_data" 

1987 

1988 # Test will create 3 collections and we will want to transfer 

1989 # two of those three. 

1990 runs = ["run1", "run2", "other"] 

1991 

1992 # Also want to use two different dataset types to ensure that 

1993 # grouping works. 

1994 datasetTypeNames = ["random_data", "random_data_2"] 

1995 

1996 # Create the run collections in the source butler. 

1997 for run in runs: 

1998 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1999 

2000 # Create dimensions in both butlers (transfer will not create them). 

2001 n_exposures = 30 

2002 for butler in (self.source_butler, self.target_butler): 

2003 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2004 butler.registry.insertDimensionData( 

2005 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2006 ) 

2007 butler.registry.insertDimensionData( 

2008 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2009 ) 

2010 

2011 for i in range(n_exposures): 

2012 butler.registry.insertDimensionData( 

2013 "exposure", 

2014 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2015 ) 

2016 

2017 # Create dataset types in the source butler. 

2018 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2019 for datasetTypeName in datasetTypeNames: 

2020 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2021 self.source_butler.registry.registerDatasetType(datasetType) 

2022 

2023 # Write a dataset to an unrelated run -- this will ensure that 

2024 # we are rewriting integer dataset ids in the target if necessary. 

2025 # Will not be relevant for UUID. 

2026 run = "distraction" 

2027 butler = Butler(butler=self.source_butler, run=run) 

2028 butler.put( 

2029 makeExampleMetrics(), 

2030 datasetTypeName, 

2031 exposure=1, 

2032 instrument="DummyCamComp", 

2033 physical_filter="d-r", 

2034 ) 

2035 

2036 # Write some example metrics to the source 

2037 butler = Butler(butler=self.source_butler) 

2038 

2039 # Set of DatasetRefs that should be in the list of refs to transfer 

2040 # but which will not be transferred. 

2041 deleted = set() 

2042 

2043 n_expected = 20 # Number of datasets expected to be transferred 

2044 source_refs = [] 

2045 for i in range(n_exposures): 

2046 # Put a third of datasets into each collection, only retain 

2047 # two thirds. 

2048 index = i % 3 

2049 run = runs[index] 

2050 datasetTypeName = datasetTypeNames[i % 2] 

2051 

2052 metric_data = { 

2053 "summary": {"counter": i}, 

2054 "output": {"text": "metric"}, 

2055 "data": [2 * x for x in range(i)], 

2056 } 

2057 metric = MetricsExample(**metric_data) 

2058 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2059 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2060 

2061 # Remove the datastore record using low-level API 

2062 if purge: 

2063 # Remove records for a fraction. 

2064 if index == 1: 

2065 

2066 # For one of these delete the file as well. 

2067 # This allows the "missing" code to filter the 

2068 # file out. 

2069 if not deleted: 

2070 primary, uris = butler.datastore.getURIs(ref) 

2071 if primary: 

2072 primary.remove() 

2073 for uri in uris.values(): 

2074 uri.remove() 

2075 n_expected -= 1 

2076 deleted.add(ref) 

2077 

2078 # Remove the datastore record. 

2079 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2080 

2081 if index < 2: 

2082 source_refs.append(ref) 

2083 if ref not in deleted: 

2084 new_metric = butler.get(ref.unresolved(), collections=run) 

2085 self.assertEqual(new_metric, metric) 

2086 

2087 # Create some bad dataset types to ensure we check for inconsistent 

2088 # definitions. 

2089 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2090 for datasetTypeName in datasetTypeNames: 

2091 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2092 self.target_butler.registry.registerDatasetType(datasetType) 

2093 with self.assertRaises(ConflictingDefinitionError): 

2094 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2095 # And remove the bad definitions. 

2096 for datasetTypeName in datasetTypeNames: 

2097 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2098 

2099 # Transfer without creating dataset types should fail. 

2100 with self.assertRaises(KeyError): 

2101 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2102 

2103 # Now transfer them to the second butler 

2104 with self.assertLogs(level=logging.DEBUG) as cm: 

2105 transferred = self.target_butler.transfer_from( 

2106 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2107 ) 

2108 self.assertEqual(len(transferred), n_expected) 

2109 log_output = ";".join(cm.output) 

2110 self.assertIn("found in datastore for chunk", log_output) 

2111 self.assertIn("Creating output run", log_output) 

2112 

2113 # Do the transfer twice to ensure that it will do nothing extra. 

2114 # Only do this if purge=True because it does not work for int 

2115 # dataset_id. 

2116 if purge: 

2117 # This should not need to register dataset types. 

2118 transferred = self.target_butler.transfer_from( 

2119 self.source_butler, source_refs, id_gen_map=id_gen_map 

2120 ) 

2121 self.assertEqual(len(transferred), n_expected) 

2122 

2123 # Also do an explicit low-level transfer to trigger some 

2124 # edge cases. 

2125 with self.assertLogs(level=logging.DEBUG) as cm: 

2126 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2127 log_output = ";".join(cm.output) 

2128 self.assertIn("no file artifacts exist", log_output) 

2129 

2130 with self.assertRaises(TypeError): 

2131 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2132 

2133 with self.assertRaises(ValueError): 

2134 self.target_butler.datastore.transfer_from( 

2135 self.source_butler.datastore, source_refs, transfer="split" 

2136 ) 

2137 

2138 # Now try to get the same refs from the new butler. 

2139 for ref in source_refs: 

2140 if ref not in deleted: 

2141 unresolved_ref = ref.unresolved() 

2142 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2143 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2144 self.assertEqual(new_metric, old_metric) 

2145 

2146 # Now prune run2 collection and create instead a CHAINED collection. 

2147 # This should block the transfer. 

2148 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2149 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2150 with self.assertRaises(CollectionTypeError): 

2151 # Re-importing the run1 datasets can be problematic if they 

2152 # use integer IDs so filter those out. 

2153 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2154 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2155 

2156 

2157if __name__ == "__main__": 2157 ↛ 2158line 2157 didn't jump to line 2158, because the condition on line 2157 was never true

2158 unittest.main()