Coverage for tests/test_butler.py: 16%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1132 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import logging 

26import os 

27import pathlib 

28import pickle 

29import posixpath 

30import random 

31import shutil 

32import socket 

33import string 

34import tempfile 

35import time 

36import unittest 

37 

38try: 

39 import boto3 

40 import botocore 

41 from moto import mock_s3 

42except ImportError: 

43 boto3 = None 

44 

45 def mock_s3(cls): 

46 """A no-op decorator in case moto mock_s3 can not be imported.""" 

47 return cls 

48 

49 

50try: 

51 from cheroot import wsgi 

52 from wsgidav.wsgidav_app import WsgiDAVApp 

53except ImportError: 

54 WsgiDAVApp = None 

55 

56from tempfile import gettempdir 

57from threading import Thread 

58 

59import astropy.time 

60from lsst.daf.butler import ( 

61 Butler, 

62 ButlerConfig, 

63 CollectionSearch, 

64 CollectionType, 

65 Config, 

66 DatasetIdGenEnum, 

67 DatasetRef, 

68 DatasetType, 

69 FileDataset, 

70 FileTemplateValidationError, 

71 StorageClassFactory, 

72 ValidationError, 

73 script, 

74) 

75from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

76from lsst.daf.butler.registry import ConflictingDefinitionError, MissingCollectionError 

77from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

78from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

79from lsst.resources import ResourcePath 

80from lsst.resources.http import isWebdavEndpoint 

81from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

82from lsst.utils import doImport 

83from lsst.utils.introspection import get_full_type_name 

84 

85TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

86 

87 

88def makeExampleMetrics(): 

89 return MetricsExample( 

90 {"AM1": 5.2, "AM2": 30.6}, 

91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

92 [563, 234, 456.7, 752, 8, 9, 27], 

93 ) 

94 

95 

96class TransactionTestError(Exception): 

97 """Specific error for testing transactions, to prevent misdiagnosing 

98 that might otherwise occur when a standard exception is used. 

99 """ 

100 

101 pass 

102 

103 

104class ButlerConfigTests(unittest.TestCase): 

105 """Simple tests for ButlerConfig that are not tested in any other test 

106 cases.""" 

107 

108 def testSearchPath(self): 

109 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

110 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

111 config1 = ButlerConfig(configFile) 

112 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

113 

114 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

115 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

116 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

117 self.assertIn("testConfigs", "\n".join(cm.output)) 

118 

119 key = ("datastore", "records", "table") 

120 self.assertNotEqual(config1[key], config2[key]) 

121 self.assertEqual(config2[key], "override_record") 

122 

123 

124class ButlerPutGetTests: 

125 """Helper method for running a suite of put/get tests from different 

126 butler configurations.""" 

127 

128 root = None 

129 

130 @staticmethod 

131 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

132 """Create a DatasetType and register it""" 

133 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

134 registry.registerDatasetType(datasetType) 

135 return datasetType 

136 

137 @classmethod 

138 def setUpClass(cls): 

139 cls.storageClassFactory = StorageClassFactory() 

140 cls.storageClassFactory.addFromConfig(cls.configFile) 

141 

142 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

143 datasetType = datasetRef.datasetType 

144 dataId = datasetRef.dataId 

145 deferred = butler.getDirectDeferred(datasetRef) 

146 

147 for component in components: 

148 compTypeName = datasetType.componentTypeName(component) 

149 result = butler.get(compTypeName, dataId, collections=collections) 

150 self.assertEqual(result, getattr(reference, component)) 

151 result_deferred = deferred.get(component=component) 

152 self.assertEqual(result_deferred, result) 

153 

154 def tearDown(self): 

155 removeTestTempDir(self.root) 

156 

157 def create_butler(self, run, storageClass, datasetTypeName): 

158 butler = Butler(self.tmpConfigFile, run=run) 

159 

160 collections = set(butler.registry.queryCollections()) 

161 self.assertEqual(collections, set([run])) 

162 

163 # Create and register a DatasetType 

164 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

165 

166 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

167 

168 # Add needed Dimensions 

169 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

170 butler.registry.insertDimensionData( 

171 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

172 ) 

173 butler.registry.insertDimensionData( 

174 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

175 ) 

176 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

177 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

178 butler.registry.insertDimensionData( 

179 "visit", 

180 { 

181 "instrument": "DummyCamComp", 

182 "id": 423, 

183 "name": "fourtwentythree", 

184 "physical_filter": "d-r", 

185 "visit_system": 1, 

186 "datetime_begin": visit_start, 

187 "datetime_end": visit_end, 

188 }, 

189 ) 

190 

191 # Add more visits for some later tests 

192 for visit_id in (424, 425): 

193 butler.registry.insertDimensionData( 

194 "visit", 

195 { 

196 "instrument": "DummyCamComp", 

197 "id": visit_id, 

198 "name": f"fourtwentyfour_{visit_id}", 

199 "physical_filter": "d-r", 

200 "visit_system": 1, 

201 }, 

202 ) 

203 return butler, datasetType 

204 

205 def runPutGetTest(self, storageClass, datasetTypeName): 

206 # New datasets will be added to run and tag, but we will only look in 

207 # tag when looking up datasets. 

208 run = "ingest" 

209 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

210 

211 # Create and store a dataset 

212 metric = makeExampleMetrics() 

213 dataId = {"instrument": "DummyCamComp", "visit": 423} 

214 

215 # Create a DatasetRef for put 

216 refIn = DatasetRef(datasetType, dataId, id=None) 

217 

218 # Put with a preexisting id should fail 

219 with self.assertRaises(ValueError): 

220 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

221 

222 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

223 # and once with a DatasetType 

224 

225 # Keep track of any collections we add and do not clean up 

226 expected_collections = {run} 

227 

228 counter = 0 

229 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

230 # Since we are using subTest we can get cascading failures 

231 # here with the first attempt failing and the others failing 

232 # immediately because the dataset already exists. Work around 

233 # this by using a distinct run collection each time 

234 counter += 1 

235 this_run = f"put_run_{counter}" 

236 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

237 expected_collections.update({this_run}) 

238 

239 with self.subTest(args=args): 

240 ref = butler.put(metric, *args, run=this_run) 

241 self.assertIsInstance(ref, DatasetRef) 

242 

243 # Test getDirect 

244 metricOut = butler.getDirect(ref) 

245 self.assertEqual(metric, metricOut) 

246 # Test get 

247 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

248 self.assertEqual(metric, metricOut) 

249 # Test get with a datasetRef 

250 metricOut = butler.get(ref, collections=this_run) 

251 self.assertEqual(metric, metricOut) 

252 # Test getDeferred with dataId 

253 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

254 self.assertEqual(metric, metricOut) 

255 # Test getDeferred with a datasetRef 

256 metricOut = butler.getDeferred(ref, collections=this_run).get() 

257 self.assertEqual(metric, metricOut) 

258 # and deferred direct with ref 

259 metricOut = butler.getDirectDeferred(ref).get() 

260 self.assertEqual(metric, metricOut) 

261 

262 # Check we can get components 

263 if storageClass.isComposite(): 

264 self.assertGetComponents( 

265 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

266 ) 

267 

268 # Can the artifacts themselves be retrieved? 

269 if not butler.datastore.isEphemeral: 

270 root_uri = ResourcePath(self.root) 

271 

272 for preserve_path in (True, False): 

273 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

274 # Use copy so that we can test that overwrite 

275 # protection works (using "auto" for File URIs would 

276 # use hard links and subsequent transfer would work 

277 # because it knows they are the same file). 

278 transferred = butler.retrieveArtifacts( 

279 [ref], destination, preserve_path=preserve_path, transfer="copy" 

280 ) 

281 self.assertGreater(len(transferred), 0) 

282 artifacts = list(ResourcePath.findFileResources([destination])) 

283 self.assertEqual(set(transferred), set(artifacts)) 

284 

285 for artifact in transferred: 

286 path_in_destination = artifact.relative_to(destination) 

287 self.assertIsNotNone(path_in_destination) 

288 

289 # when path is not preserved there should not be 

290 # any path separators. 

291 num_seps = path_in_destination.count("/") 

292 if preserve_path: 

293 self.assertGreater(num_seps, 0) 

294 else: 

295 self.assertEqual(num_seps, 0) 

296 

297 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

298 n_uris = len(secondary_uris) 

299 if primary_uri: 

300 n_uris += 1 

301 self.assertEqual( 

302 len(artifacts), 

303 n_uris, 

304 "Comparing expected artifacts vs actual:" 

305 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

306 ) 

307 

308 if preserve_path: 

309 # No need to run these twice 

310 with self.assertRaises(ValueError): 

311 butler.retrieveArtifacts([ref], destination, transfer="move") 

312 

313 with self.assertRaises(FileExistsError): 

314 butler.retrieveArtifacts([ref], destination) 

315 

316 transferred_again = butler.retrieveArtifacts( 

317 [ref], destination, preserve_path=preserve_path, overwrite=True 

318 ) 

319 self.assertEqual(set(transferred_again), set(transferred)) 

320 

321 # Now remove the dataset completely. 

322 butler.pruneDatasets([ref], purge=True, unstore=True, run=this_run) 

323 # Lookup with original args should still fail. 

324 with self.assertRaises(LookupError): 

325 butler.datasetExists(*args, collections=this_run) 

326 # getDirect() should still fail. 

327 with self.assertRaises(FileNotFoundError): 

328 butler.getDirect(ref) 

329 # Registry shouldn't be able to find it by dataset_id anymore. 

330 self.assertIsNone(butler.registry.getDataset(ref.id)) 

331 

332 # Do explicit registry removal since we know they are 

333 # empty 

334 butler.registry.removeCollection(this_run) 

335 expected_collections.remove(this_run) 

336 

337 # Put the dataset again, since the last thing we did was remove it 

338 # and we want to use the default collection. 

339 ref = butler.put(metric, refIn) 

340 

341 # Get with parameters 

342 stop = 4 

343 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

344 self.assertNotEqual(metric, sliced) 

345 self.assertEqual(metric.summary, sliced.summary) 

346 self.assertEqual(metric.output, sliced.output) 

347 self.assertEqual(metric.data[:stop], sliced.data) 

348 # getDeferred with parameters 

349 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

350 self.assertNotEqual(metric, sliced) 

351 self.assertEqual(metric.summary, sliced.summary) 

352 self.assertEqual(metric.output, sliced.output) 

353 self.assertEqual(metric.data[:stop], sliced.data) 

354 # getDeferred with deferred parameters 

355 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

356 self.assertNotEqual(metric, sliced) 

357 self.assertEqual(metric.summary, sliced.summary) 

358 self.assertEqual(metric.output, sliced.output) 

359 self.assertEqual(metric.data[:stop], sliced.data) 

360 

361 if storageClass.isComposite(): 

362 # Check that components can be retrieved 

363 metricOut = butler.get(ref.datasetType.name, dataId) 

364 compNameS = ref.datasetType.componentTypeName("summary") 

365 compNameD = ref.datasetType.componentTypeName("data") 

366 summary = butler.get(compNameS, dataId) 

367 self.assertEqual(summary, metric.summary) 

368 data = butler.get(compNameD, dataId) 

369 self.assertEqual(data, metric.data) 

370 

371 if "counter" in storageClass.derivedComponents: 

372 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

373 self.assertEqual(count, len(data)) 

374 

375 count = butler.get( 

376 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

377 ) 

378 self.assertEqual(count, stop) 

379 

380 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

381 summary = butler.getDirect(compRef) 

382 self.assertEqual(summary, metric.summary) 

383 

384 # Create a Dataset type that has the same name but is inconsistent. 

385 inconsistentDatasetType = DatasetType( 

386 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

387 ) 

388 

389 # Getting with a dataset type that does not match registry fails 

390 with self.assertRaises(ValueError): 

391 butler.get(inconsistentDatasetType, dataId) 

392 

393 # Combining a DatasetRef with a dataId should fail 

394 with self.assertRaises(ValueError): 

395 butler.get(ref, dataId) 

396 # Getting with an explicit ref should fail if the id doesn't match 

397 with self.assertRaises(ValueError): 

398 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

399 

400 # Getting a dataset with unknown parameters should fail 

401 with self.assertRaises(KeyError): 

402 butler.get(ref, parameters={"unsupported": True}) 

403 

404 # Check we have a collection 

405 collections = set(butler.registry.queryCollections()) 

406 self.assertEqual(collections, expected_collections) 

407 

408 # Clean up to check that we can remove something that may have 

409 # already had a component removed 

410 butler.pruneDatasets([ref], unstore=True, purge=True) 

411 

412 # Check that we can configure a butler to accept a put even 

413 # if it already has the dataset in registry. 

414 ref = butler.put(metric, refIn) 

415 

416 # Repeat put will fail. 

417 with self.assertRaises(ConflictingDefinitionError): 

418 butler.put(metric, refIn) 

419 

420 # Remove the datastore entry. 

421 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

422 

423 # Put will still fail 

424 with self.assertRaises(ConflictingDefinitionError): 

425 butler.put(metric, refIn) 

426 

427 # Allow the put to succeed 

428 butler._allow_put_of_predefined_dataset = True 

429 ref2 = butler.put(metric, refIn) 

430 self.assertEqual(ref2.id, ref.id) 

431 

432 # A second put will still fail but with a different exception 

433 # than before. 

434 with self.assertRaises(ConflictingDefinitionError): 

435 butler.put(metric, refIn) 

436 

437 # Reset the flag to avoid confusion 

438 butler._allow_put_of_predefined_dataset = False 

439 

440 # Leave the dataset in place since some downstream tests require 

441 # something to be present 

442 

443 return butler 

444 

445 def testDeferredCollectionPassing(self): 

446 # Construct a butler with no run or collection, but make it writeable. 

447 butler = Butler(self.tmpConfigFile, writeable=True) 

448 # Create and register a DatasetType 

449 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

450 datasetType = self.addDatasetType( 

451 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

452 ) 

453 # Add needed Dimensions 

454 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

455 butler.registry.insertDimensionData( 

456 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

457 ) 

458 butler.registry.insertDimensionData( 

459 "visit", 

460 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

461 ) 

462 dataId = {"instrument": "DummyCamComp", "visit": 423} 

463 # Create dataset. 

464 metric = makeExampleMetrics() 

465 # Register a new run and put dataset. 

466 run = "deferred" 

467 self.assertTrue(butler.registry.registerRun(run)) 

468 # Second time it will be allowed but indicate no-op 

469 self.assertFalse(butler.registry.registerRun(run)) 

470 ref = butler.put(metric, datasetType, dataId, run=run) 

471 # Putting with no run should fail with TypeError. 

472 with self.assertRaises(TypeError): 

473 butler.put(metric, datasetType, dataId) 

474 # Dataset should exist. 

475 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

476 # We should be able to get the dataset back, but with and without 

477 # a deferred dataset handle. 

478 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

479 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

480 # Trying to find the dataset without any collection is a TypeError. 

481 with self.assertRaises(TypeError): 

482 butler.datasetExists(datasetType, dataId) 

483 with self.assertRaises(TypeError): 

484 butler.get(datasetType, dataId) 

485 # Associate the dataset with a different collection. 

486 butler.registry.registerCollection("tagged") 

487 butler.registry.associate("tagged", [ref]) 

488 # Deleting the dataset from the new collection should make it findable 

489 # in the original collection. 

490 butler.pruneDatasets([ref], tags=["tagged"]) 

491 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

492 

493 

494class ButlerTests(ButlerPutGetTests): 

495 """Tests for Butler.""" 

496 

497 useTempRoot = True 

498 

499 def setUp(self): 

500 """Create a new butler root for each test.""" 

501 self.root = makeTestTempDir(TESTDIR) 

502 Butler.makeRepo(self.root, config=Config(self.configFile)) 

503 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

504 

505 def testConstructor(self): 

506 """Independent test of constructor.""" 

507 butler = Butler(self.tmpConfigFile, run="ingest") 

508 self.assertIsInstance(butler, Butler) 

509 

510 # Check that butler.yaml is added automatically. 

511 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

512 config_dir = self.tmpConfigFile[: -len(end)] 

513 butler = Butler(config_dir, run="ingest") 

514 self.assertIsInstance(butler, Butler) 

515 

516 collections = set(butler.registry.queryCollections()) 

517 self.assertEqual(collections, {"ingest"}) 

518 

519 # Check that some special characters can be included in run name. 

520 special_run = "u@b.c-A" 

521 butler_special = Butler(butler=butler, run=special_run) 

522 collections = set(butler_special.registry.queryCollections("*@*")) 

523 self.assertEqual(collections, {special_run}) 

524 

525 butler2 = Butler(butler=butler, collections=["other"]) 

526 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

527 self.assertIsNone(butler2.run) 

528 self.assertIs(butler.datastore, butler2.datastore) 

529 

530 # Test that we can use an environment variable to find this 

531 # repository. 

532 butler_index = Config() 

533 butler_index["label"] = self.tmpConfigFile 

534 for suffix in (".yaml", ".json"): 

535 # Ensure that the content differs so that we know that 

536 # we aren't reusing the cache. 

537 bad_label = f"s3://bucket/not_real{suffix}" 

538 butler_index["bad_label"] = bad_label 

539 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

540 butler_index.dumpToUri(temp_file) 

541 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

542 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

543 uri = Butler.get_repo_uri("bad_label") 

544 self.assertEqual(uri, ResourcePath(bad_label)) 

545 uri = Butler.get_repo_uri("label") 

546 butler = Butler(uri, writeable=False) 

547 self.assertIsInstance(butler, Butler) 

548 with self.assertRaises(KeyError) as cm: 

549 Butler.get_repo_uri("missing") 

550 self.assertIn("not known to", str(cm.exception)) 

551 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

552 with self.assertRaises(FileNotFoundError): 

553 Butler.get_repo_uri("label") 

554 self.assertEqual(Butler.get_known_repos(), set()) 

555 with self.assertRaises(KeyError) as cm: 

556 # No environment variable set. 

557 Butler.get_repo_uri("label") 

558 self.assertIn("No repository index defined", str(cm.exception)) 

559 self.assertEqual(Butler.get_known_repos(), set()) 

560 

561 def testBasicPutGet(self): 

562 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

563 self.runPutGetTest(storageClass, "test_metric") 

564 

565 def testCompositePutGetConcrete(self): 

566 

567 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

568 butler = self.runPutGetTest(storageClass, "test_metric") 

569 

570 # Should *not* be disassembled 

571 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

572 self.assertEqual(len(datasets), 1) 

573 uri, components = butler.getURIs(datasets[0]) 

574 self.assertIsInstance(uri, ResourcePath) 

575 self.assertFalse(components) 

576 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

577 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

578 

579 # Predicted dataset 

580 dataId = {"instrument": "DummyCamComp", "visit": 424} 

581 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

582 self.assertFalse(components) 

583 self.assertIsInstance(uri, ResourcePath) 

584 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

585 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

586 

587 def testCompositePutGetVirtual(self): 

588 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

589 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

590 

591 # Should be disassembled 

592 datasets = list(butler.registry.queryDatasets(..., collections="ingest")) 

593 self.assertEqual(len(datasets), 1) 

594 uri, components = butler.getURIs(datasets[0]) 

595 

596 if butler.datastore.isEphemeral: 

597 # Never disassemble in-memory datastore 

598 self.assertIsInstance(uri, ResourcePath) 

599 self.assertFalse(components) 

600 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

601 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

602 else: 

603 self.assertIsNone(uri) 

604 self.assertEqual(set(components), set(storageClass.components)) 

605 for compuri in components.values(): 

606 self.assertIsInstance(compuri, ResourcePath) 

607 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

608 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

609 

610 # Predicted dataset 

611 dataId = {"instrument": "DummyCamComp", "visit": 424} 

612 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

613 

614 if butler.datastore.isEphemeral: 

615 # Never disassembled 

616 self.assertIsInstance(uri, ResourcePath) 

617 self.assertFalse(components) 

618 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

619 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

620 else: 

621 self.assertIsNone(uri) 

622 self.assertEqual(set(components), set(storageClass.components)) 

623 for compuri in components.values(): 

624 self.assertIsInstance(compuri, ResourcePath) 

625 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

626 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

627 

628 def testIngest(self): 

629 butler = Butler(self.tmpConfigFile, run="ingest") 

630 

631 # Create and register a DatasetType 

632 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

633 

634 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

635 datasetTypeName = "metric" 

636 

637 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

638 

639 # Add needed Dimensions 

640 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

641 butler.registry.insertDimensionData( 

642 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

643 ) 

644 for detector in (1, 2): 

645 butler.registry.insertDimensionData( 

646 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

647 ) 

648 

649 butler.registry.insertDimensionData( 

650 "visit", 

651 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

652 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

653 ) 

654 

655 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

656 dataRoot = os.path.join(TESTDIR, "data", "basic") 

657 datasets = [] 

658 for detector in (1, 2): 

659 detector_name = f"detector_{detector}" 

660 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

661 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

662 # Create a DatasetRef for ingest 

663 refIn = DatasetRef(datasetType, dataId, id=None) 

664 

665 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

666 

667 butler.ingest(*datasets, transfer="copy") 

668 

669 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

670 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

671 

672 metrics1 = butler.get(datasetTypeName, dataId1) 

673 metrics2 = butler.get(datasetTypeName, dataId2) 

674 self.assertNotEqual(metrics1, metrics2) 

675 

676 # Compare URIs 

677 uri1 = butler.getURI(datasetTypeName, dataId1) 

678 uri2 = butler.getURI(datasetTypeName, dataId2) 

679 self.assertNotEqual(uri1, uri2) 

680 

681 # Now do a multi-dataset but single file ingest 

682 metricFile = os.path.join(dataRoot, "detectors.yaml") 

683 refs = [] 

684 for detector in (1, 2): 

685 detector_name = f"detector_{detector}" 

686 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

687 # Create a DatasetRef for ingest 

688 refs.append(DatasetRef(datasetType, dataId, id=None)) 

689 

690 datasets = [] 

691 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

692 

693 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

694 

695 # Check that the datastore recorded no file size. 

696 # Not all datastores can support this. 

697 try: 

698 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

699 self.assertEqual(infos[0].file_size, -1) 

700 except AttributeError: 

701 pass 

702 

703 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

704 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

705 

706 multi1 = butler.get(datasetTypeName, dataId1) 

707 multi2 = butler.get(datasetTypeName, dataId2) 

708 

709 self.assertEqual(multi1, metrics1) 

710 self.assertEqual(multi2, metrics2) 

711 

712 # Compare URIs 

713 uri1 = butler.getURI(datasetTypeName, dataId1) 

714 uri2 = butler.getURI(datasetTypeName, dataId2) 

715 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

716 

717 # Test that removing one does not break the second 

718 # This line will issue a warning log message for a ChainedDatastore 

719 # that uses an InMemoryDatastore since in-memory can not ingest 

720 # files. 

721 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

722 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

723 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

724 multi2b = butler.get(datasetTypeName, dataId2) 

725 self.assertEqual(multi2, multi2b) 

726 

727 def testPruneCollections(self): 

728 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

729 butler = Butler(self.tmpConfigFile, writeable=True) 

730 # Load registry data with dimensions to hang datasets off of. 

731 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

732 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

733 # Add some RUN-type collections. 

734 run1 = "run1" 

735 butler.registry.registerRun(run1) 

736 run2 = "run2" 

737 butler.registry.registerRun(run2) 

738 # put some datasets. ref1 and ref2 have the same data ID, and are in 

739 # different runs. ref3 has a different data ID. 

740 metric = makeExampleMetrics() 

741 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

742 datasetType = self.addDatasetType( 

743 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

744 ) 

745 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

746 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

747 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

748 

749 # Try to delete a RUN collection without purge, or with purge and not 

750 # unstore. 

751 with self.assertRaises(TypeError): 

752 butler.pruneCollection(run1) 

753 with self.assertRaises(TypeError): 

754 butler.pruneCollection(run2, purge=True) 

755 # Add a TAGGED collection and associate ref3 only into it. 

756 tag1 = "tag1" 

757 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

758 self.assertTrue(registered) 

759 # Registering a second time should be allowed. 

760 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

761 self.assertFalse(registered) 

762 butler.registry.associate(tag1, [ref3]) 

763 # Add a CHAINED collection that searches run1 and then run2. It 

764 # logically contains only ref1, because ref2 is shadowed due to them 

765 # having the same data ID and dataset type. 

766 chain1 = "chain1" 

767 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

768 butler.registry.setCollectionChain(chain1, [run1, run2]) 

769 # Try to delete RUN collections, which should fail with complete 

770 # rollback because they're still referenced by the CHAINED 

771 # collection. 

772 with self.assertRaises(Exception): 

773 butler.pruneCollection(run1, pruge=True, unstore=True) 

774 with self.assertRaises(Exception): 

775 butler.pruneCollection(run2, pruge=True, unstore=True) 

776 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

777 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

778 self.assertTrue(existence[ref1]) 

779 self.assertTrue(existence[ref2]) 

780 self.assertTrue(existence[ref3]) 

781 # Try to delete CHAINED and TAGGED collections with purge; should not 

782 # work. 

783 with self.assertRaises(TypeError): 

784 butler.pruneCollection(tag1, purge=True, unstore=True) 

785 with self.assertRaises(TypeError): 

786 butler.pruneCollection(chain1, purge=True, unstore=True) 

787 # Remove the tagged collection with unstore=False. This should not 

788 # affect the datasets. 

789 butler.pruneCollection(tag1) 

790 with self.assertRaises(MissingCollectionError): 

791 butler.registry.getCollectionType(tag1) 

792 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

793 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

794 self.assertTrue(existence[ref1]) 

795 self.assertTrue(existence[ref2]) 

796 self.assertTrue(existence[ref3]) 

797 # Add the tagged collection back in, and remove it with unstore=True. 

798 # This should remove ref3 only from the datastore. 

799 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

800 butler.registry.associate(tag1, [ref3]) 

801 butler.pruneCollection(tag1, unstore=True) 

802 with self.assertRaises(MissingCollectionError): 

803 butler.registry.getCollectionType(tag1) 

804 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

805 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

806 self.assertTrue(existence[ref1]) 

807 self.assertTrue(existence[ref2]) 

808 self.assertFalse(existence[ref3]) 

809 # Delete the chain with unstore=False. The datasets should not be 

810 # affected at all. 

811 butler.pruneCollection(chain1) 

812 with self.assertRaises(MissingCollectionError): 

813 butler.registry.getCollectionType(chain1) 

814 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

815 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

816 self.assertTrue(existence[ref1]) 

817 self.assertTrue(existence[ref2]) 

818 self.assertFalse(existence[ref3]) 

819 # Redefine and then delete the chain with unstore=True. Only ref1 

820 # should be unstored (ref3 has already been unstored, but otherwise 

821 # would be now). 

822 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

823 butler.registry.setCollectionChain(chain1, [run1, run2]) 

824 butler.pruneCollection(chain1, unstore=True) 

825 with self.assertRaises(MissingCollectionError): 

826 butler.registry.getCollectionType(chain1) 

827 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

828 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

829 self.assertFalse(existence[ref1]) 

830 self.assertTrue(existence[ref2]) 

831 self.assertFalse(existence[ref3]) 

832 # Remove run1. This removes ref1 and ref3 from the registry (they're 

833 # already gone from the datastore, which is fine). 

834 butler.pruneCollection(run1, purge=True, unstore=True) 

835 with self.assertRaises(MissingCollectionError): 

836 butler.registry.getCollectionType(run1) 

837 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

838 self.assertTrue(butler.datastore.exists(ref2)) 

839 # Remove run2. This removes ref2 from the registry and the datastore. 

840 butler.pruneCollection(run2, purge=True, unstore=True) 

841 with self.assertRaises(MissingCollectionError): 

842 butler.registry.getCollectionType(run2) 

843 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

844 

845 # Now that the collections have been pruned we can remove the 

846 # dataset type 

847 butler.registry.removeDatasetType(datasetType.name) 

848 

849 def testPickle(self): 

850 """Test pickle support.""" 

851 butler = Butler(self.tmpConfigFile, run="ingest") 

852 butlerOut = pickle.loads(pickle.dumps(butler)) 

853 self.assertIsInstance(butlerOut, Butler) 

854 self.assertEqual(butlerOut._config, butler._config) 

855 self.assertEqual(butlerOut.collections, butler.collections) 

856 self.assertEqual(butlerOut.run, butler.run) 

857 

858 def testGetDatasetTypes(self): 

859 butler = Butler(self.tmpConfigFile, run="ingest") 

860 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

861 dimensionEntries = [ 

862 ( 

863 "instrument", 

864 {"instrument": "DummyCam"}, 

865 {"instrument": "DummyHSC"}, 

866 {"instrument": "DummyCamComp"}, 

867 ), 

868 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

869 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

870 ] 

871 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

872 # Add needed Dimensions 

873 for args in dimensionEntries: 

874 butler.registry.insertDimensionData(*args) 

875 

876 # When a DatasetType is added to the registry entries are not created 

877 # for components but querying them can return the components. 

878 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

879 components = set() 

880 for datasetTypeName in datasetTypeNames: 

881 # Create and register a DatasetType 

882 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

883 

884 for componentName in storageClass.components: 

885 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

886 

887 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

888 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

889 

890 # Now that we have some dataset types registered, validate them 

891 butler.validateConfiguration( 

892 ignore=[ 

893 "test_metric_comp", 

894 "metric3", 

895 "calexp", 

896 "DummySC", 

897 "datasetType.component", 

898 "random_data", 

899 "random_data_2", 

900 ] 

901 ) 

902 

903 # Add a new datasetType that will fail template validation 

904 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

905 if self.validationCanFail: 

906 with self.assertRaises(ValidationError): 

907 butler.validateConfiguration() 

908 

909 # Rerun validation but with a subset of dataset type names 

910 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

911 

912 # Rerun validation but ignore the bad datasetType 

913 butler.validateConfiguration( 

914 ignore=[ 

915 "test_metric_comp", 

916 "metric3", 

917 "calexp", 

918 "DummySC", 

919 "datasetType.component", 

920 "random_data", 

921 "random_data_2", 

922 ] 

923 ) 

924 

925 def testTransaction(self): 

926 butler = Butler(self.tmpConfigFile, run="ingest") 

927 datasetTypeName = "test_metric" 

928 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

929 dimensionEntries = ( 

930 ("instrument", {"instrument": "DummyCam"}), 

931 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

932 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

933 ) 

934 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

935 metric = makeExampleMetrics() 

936 dataId = {"instrument": "DummyCam", "visit": 42} 

937 # Create and register a DatasetType 

938 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

939 with self.assertRaises(TransactionTestError): 

940 with butler.transaction(): 

941 # Add needed Dimensions 

942 for args in dimensionEntries: 

943 butler.registry.insertDimensionData(*args) 

944 # Store a dataset 

945 ref = butler.put(metric, datasetTypeName, dataId) 

946 self.assertIsInstance(ref, DatasetRef) 

947 # Test getDirect 

948 metricOut = butler.getDirect(ref) 

949 self.assertEqual(metric, metricOut) 

950 # Test get 

951 metricOut = butler.get(datasetTypeName, dataId) 

952 self.assertEqual(metric, metricOut) 

953 # Check we can get components 

954 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

955 raise TransactionTestError("This should roll back the entire transaction") 

956 with self.assertRaises(LookupError, msg=f"Check can't expand DataId {dataId}"): 

957 butler.registry.expandDataId(dataId) 

958 # Should raise LookupError for missing data ID value 

959 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

960 butler.get(datasetTypeName, dataId) 

961 # Also check explicitly if Dataset entry is missing 

962 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

963 # Direct retrieval should not find the file in the Datastore 

964 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

965 butler.getDirect(ref) 

966 

967 def testMakeRepo(self): 

968 """Test that we can write butler configuration to a new repository via 

969 the Butler.makeRepo interface and then instantiate a butler from the 

970 repo root. 

971 """ 

972 # Do not run the test if we know this datastore configuration does 

973 # not support a file system root 

974 if self.fullConfigKey is None: 

975 return 

976 

977 # create two separate directories 

978 root1 = tempfile.mkdtemp(dir=self.root) 

979 root2 = tempfile.mkdtemp(dir=self.root) 

980 

981 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

982 limited = Config(self.configFile) 

983 butler1 = Butler(butlerConfig) 

984 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

985 full = Config(self.tmpConfigFile) 

986 butler2 = Butler(butlerConfig) 

987 # Butlers should have the same configuration regardless of whether 

988 # defaults were expanded. 

989 self.assertEqual(butler1._config, butler2._config) 

990 # Config files loaded directly should not be the same. 

991 self.assertNotEqual(limited, full) 

992 # Make sure "limited" doesn't have a few keys we know it should be 

993 # inheriting from defaults. 

994 self.assertIn(self.fullConfigKey, full) 

995 self.assertNotIn(self.fullConfigKey, limited) 

996 

997 # Collections don't appear until something is put in them 

998 collections1 = set(butler1.registry.queryCollections()) 

999 self.assertEqual(collections1, set()) 

1000 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1001 

1002 # Check that a config with no associated file name will not 

1003 # work properly with relocatable Butler repo 

1004 butlerConfig.configFile = None 

1005 with self.assertRaises(ValueError): 

1006 Butler(butlerConfig) 

1007 

1008 with self.assertRaises(FileExistsError): 

1009 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1010 

1011 def testStringification(self): 

1012 butler = Butler(self.tmpConfigFile, run="ingest") 

1013 butlerStr = str(butler) 

1014 

1015 if self.datastoreStr is not None: 

1016 for testStr in self.datastoreStr: 

1017 self.assertIn(testStr, butlerStr) 

1018 if self.registryStr is not None: 

1019 self.assertIn(self.registryStr, butlerStr) 

1020 

1021 datastoreName = butler.datastore.name 

1022 if self.datastoreName is not None: 

1023 for testStr in self.datastoreName: 

1024 self.assertIn(testStr, datastoreName) 

1025 

1026 def testButlerRewriteDataId(self): 

1027 """Test that dataIds can be rewritten based on dimension records.""" 

1028 

1029 butler = Butler(self.tmpConfigFile, run="ingest") 

1030 

1031 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1032 datasetTypeName = "random_data" 

1033 

1034 # Create dimension records. 

1035 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1036 butler.registry.insertDimensionData( 

1037 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1038 ) 

1039 butler.registry.insertDimensionData( 

1040 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1041 ) 

1042 

1043 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1044 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1045 butler.registry.registerDatasetType(datasetType) 

1046 

1047 n_exposures = 5 

1048 dayobs = 20210530 

1049 

1050 for i in range(n_exposures): 

1051 butler.registry.insertDimensionData( 

1052 "exposure", 

1053 { 

1054 "instrument": "DummyCamComp", 

1055 "id": i, 

1056 "obs_id": f"exp{i}", 

1057 "seq_num": i, 

1058 "day_obs": dayobs, 

1059 "physical_filter": "d-r", 

1060 }, 

1061 ) 

1062 

1063 # Write some data. 

1064 for i in range(n_exposures): 

1065 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1066 

1067 # Use the seq_num for the put to test rewriting. 

1068 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1069 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1070 

1071 # Check that the exposure is correct in the dataId 

1072 self.assertEqual(ref.dataId["exposure"], i) 

1073 

1074 # and check that we can get the dataset back with the same dataId 

1075 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1076 self.assertEqual(new_metric, metric) 

1077 

1078 

1079class FileDatastoreButlerTests(ButlerTests): 

1080 """Common tests and specialization of ButlerTests for butlers backed 

1081 by datastores that inherit from FileDatastore. 

1082 """ 

1083 

1084 def checkFileExists(self, root, relpath): 

1085 """Checks if file exists at a given path (relative to root). 

1086 

1087 Test testPutTemplates verifies actual physical existance of the files 

1088 in the requested location. 

1089 """ 

1090 uri = ResourcePath(root, forceDirectory=True) 

1091 return uri.join(relpath).exists() 

1092 

1093 def testPutTemplates(self): 

1094 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1095 butler = Butler(self.tmpConfigFile, run="ingest") 

1096 

1097 # Add needed Dimensions 

1098 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1099 butler.registry.insertDimensionData( 

1100 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1101 ) 

1102 butler.registry.insertDimensionData( 

1103 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1104 ) 

1105 butler.registry.insertDimensionData( 

1106 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1107 ) 

1108 

1109 # Create and store a dataset 

1110 metric = makeExampleMetrics() 

1111 

1112 # Create two almost-identical DatasetTypes (both will use default 

1113 # template) 

1114 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1115 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1116 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1117 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1118 

1119 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1120 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1121 

1122 # Put with exactly the data ID keys needed 

1123 ref = butler.put(metric, "metric1", dataId1) 

1124 uri = butler.getURI(ref) 

1125 self.assertTrue( 

1126 self.checkFileExists(butler.datastore.root, "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"), 

1127 f"Checking existence of {uri}", 

1128 ) 

1129 

1130 # Check the template based on dimensions 

1131 butler.datastore.templates.validateTemplates([ref]) 

1132 

1133 # Put with extra data ID keys (physical_filter is an optional 

1134 # dependency); should not change template (at least the way we're 

1135 # defining them to behave now; the important thing is that they 

1136 # must be consistent). 

1137 ref = butler.put(metric, "metric2", dataId2) 

1138 uri = butler.getURI(ref) 

1139 self.assertTrue( 

1140 self.checkFileExists(butler.datastore.root, "ingest/metric2/d-r/DummyCamComp_v423.pickle"), 

1141 f"Checking existence of {uri}", 

1142 ) 

1143 

1144 # Check the template based on dimensions 

1145 butler.datastore.templates.validateTemplates([ref]) 

1146 

1147 # Now use a file template that will not result in unique filenames 

1148 with self.assertRaises(FileTemplateValidationError): 

1149 butler.put(metric, "metric3", dataId1) 

1150 

1151 def testImportExport(self): 

1152 # Run put/get tests just to create and populate a repo. 

1153 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1154 self.runImportExportTest(storageClass) 

1155 

1156 @unittest.expectedFailure 

1157 def testImportExportVirtualComposite(self): 

1158 # Run put/get tests just to create and populate a repo. 

1159 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1160 self.runImportExportTest(storageClass) 

1161 

1162 def runImportExportTest(self, storageClass): 

1163 """This test does an export to a temp directory and an import back 

1164 into a new temp directory repo. It does not assume a posix datastore""" 

1165 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1166 print("Root:", exportButler.datastore.root) 

1167 # Test that the repo actually has at least one dataset. 

1168 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1169 self.assertGreater(len(datasets), 0) 

1170 # Add a DimensionRecord that's unused by those datasets. 

1171 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1172 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1173 # Export and then import datasets. 

1174 with safeTestTempDir(TESTDIR) as exportDir: 

1175 exportFile = os.path.join(exportDir, "exports.yaml") 

1176 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1177 export.saveDatasets(datasets) 

1178 # Export the same datasets again. This should quietly do 

1179 # nothing because of internal deduplication, and it shouldn't 

1180 # complain about being asked to export the "htm7" elements even 

1181 # though there aren't any in these datasets or in the database. 

1182 export.saveDatasets(datasets, elements=["htm7"]) 

1183 # Save one of the data IDs again; this should be harmless 

1184 # because of internal deduplication. 

1185 export.saveDataIds([datasets[0].dataId]) 

1186 # Save some dimension records directly. 

1187 export.saveDimensionData("skymap", [skymapRecord]) 

1188 self.assertTrue(os.path.exists(exportFile)) 

1189 with safeTestTempDir(TESTDIR) as importDir: 

1190 # We always want this to be a local posix butler 

1191 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1192 # Calling script.butlerImport tests the implementation of the 

1193 # butler command line interface "import" subcommand. Functions 

1194 # in the script folder are generally considered protected and 

1195 # should not be used as public api. 

1196 with open(exportFile, "r") as f: 

1197 script.butlerImport( 

1198 importDir, 

1199 export_file=f, 

1200 directory=exportDir, 

1201 transfer="auto", 

1202 skip_dimensions=None, 

1203 reuse_ids=False, 

1204 ) 

1205 importButler = Butler(importDir, run="ingest") 

1206 for ref in datasets: 

1207 with self.subTest(ref=ref): 

1208 # Test for existence by passing in the DatasetType and 

1209 # data ID separately, to avoid lookup by dataset_id. 

1210 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1211 self.assertEqual( 

1212 list(importButler.registry.queryDimensionRecords("skymap")), 

1213 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1214 ) 

1215 

1216 def testRemoveRuns(self): 

1217 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1218 butler = Butler(self.tmpConfigFile, writeable=True) 

1219 # Load registry data with dimensions to hang datasets off of. 

1220 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1221 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1222 # Add some RUN-type collection. 

1223 run1 = "run1" 

1224 butler.registry.registerRun(run1) 

1225 run2 = "run2" 

1226 butler.registry.registerRun(run2) 

1227 # put a dataset in each 

1228 metric = makeExampleMetrics() 

1229 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1230 datasetType = self.addDatasetType( 

1231 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1232 ) 

1233 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1234 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1235 uri1 = butler.getURI(ref1, collections=[run1]) 

1236 uri2 = butler.getURI(ref2, collections=[run2]) 

1237 # Remove from both runs with different values for unstore. 

1238 butler.removeRuns([run1], unstore=True) 

1239 butler.removeRuns([run2], unstore=False) 

1240 # Should be nothing in registry for either one, and datastore should 

1241 # not think either exists. 

1242 with self.assertRaises(MissingCollectionError): 

1243 butler.registry.getCollectionType(run1) 

1244 with self.assertRaises(MissingCollectionError): 

1245 butler.registry.getCollectionType(run2) 

1246 self.assertFalse(butler.datastore.exists(ref1)) 

1247 self.assertFalse(butler.datastore.exists(ref2)) 

1248 # The ref we unstored should be gone according to the URI, but the 

1249 # one we forgot should still be around. 

1250 self.assertFalse(uri1.exists()) 

1251 self.assertTrue(uri2.exists()) 

1252 

1253 

1254class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1255 """PosixDatastore specialization of a butler""" 

1256 

1257 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1258 fullConfigKey = ".datastore.formatters" 

1259 validationCanFail = True 

1260 datastoreStr = ["/tmp"] 

1261 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1262 registryStr = "/gen3.sqlite3" 

1263 

1264 def testPathConstructor(self): 

1265 """Independent test of constructor using PathLike.""" 

1266 butler = Butler(self.tmpConfigFile, run="ingest") 

1267 self.assertIsInstance(butler, Butler) 

1268 

1269 # And again with a Path object with the butler yaml 

1270 path = pathlib.Path(self.tmpConfigFile) 

1271 butler = Butler(path, writeable=False) 

1272 self.assertIsInstance(butler, Butler) 

1273 

1274 # And again with a Path object without the butler yaml 

1275 # (making sure we skip it if the tmp config doesn't end 

1276 # in butler.yaml -- which is the case for a subclass) 

1277 if self.tmpConfigFile.endswith("butler.yaml"): 

1278 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1279 butler = Butler(path, writeable=False) 

1280 self.assertIsInstance(butler, Butler) 

1281 

1282 def testExportTransferCopy(self): 

1283 """Test local export using all transfer modes""" 

1284 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1285 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1286 # Test that the repo actually has at least one dataset. 

1287 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1288 self.assertGreater(len(datasets), 0) 

1289 uris = [exportButler.getURI(d) for d in datasets] 

1290 datastoreRoot = exportButler.datastore.root 

1291 

1292 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1293 

1294 for path in pathsInStore: 

1295 # Assume local file system 

1296 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1297 

1298 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1299 with safeTestTempDir(TESTDIR) as exportDir: 

1300 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1301 export.saveDatasets(datasets) 

1302 for path in pathsInStore: 

1303 self.assertTrue( 

1304 self.checkFileExists(exportDir, path), 

1305 f"Check that mode {transfer} exported files", 

1306 ) 

1307 

1308 def testPruneDatasets(self): 

1309 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1310 butler = Butler(self.tmpConfigFile, writeable=True) 

1311 # Load registry data with dimensions to hang datasets off of. 

1312 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1313 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1314 # Add some RUN-type collections. 

1315 run1 = "run1" 

1316 butler.registry.registerRun(run1) 

1317 run2 = "run2" 

1318 butler.registry.registerRun(run2) 

1319 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1320 # different runs. ref3 has a different data ID. 

1321 metric = makeExampleMetrics() 

1322 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1323 datasetType = self.addDatasetType( 

1324 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1325 ) 

1326 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1327 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1328 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1329 

1330 # Simple prune. 

1331 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1332 with self.assertRaises(LookupError): 

1333 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1334 

1335 # Put data back. 

1336 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1337 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1338 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1339 

1340 # Check that in normal mode, deleting the record will lead to 

1341 # trash not touching the file. 

1342 uri1 = butler.datastore.getURI(ref1) 

1343 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1344 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1345 butler.datastore.trash(ref1) 

1346 butler.datastore.emptyTrash() 

1347 self.assertTrue(uri1.exists()) 

1348 uri1.remove() # Clean it up. 

1349 

1350 # Simulate execution butler setup by deleting the datastore 

1351 # record but keeping the file around and trusting. 

1352 butler.datastore.trustGetRequest = True 

1353 uri2 = butler.datastore.getURI(ref2) 

1354 uri3 = butler.datastore.getURI(ref3) 

1355 self.assertTrue(uri2.exists()) 

1356 self.assertTrue(uri3.exists()) 

1357 

1358 # Remove the datastore record. 

1359 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1360 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1361 self.assertTrue(uri2.exists()) 

1362 butler.datastore.trash([ref2, ref3]) 

1363 # Immediate removal for ref2 file 

1364 self.assertFalse(uri2.exists()) 

1365 # But ref3 has to wait for the empty. 

1366 self.assertTrue(uri3.exists()) 

1367 butler.datastore.emptyTrash() 

1368 self.assertFalse(uri3.exists()) 

1369 

1370 # Clear out the datasets from registry. 

1371 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1372 

1373 def testPytypePutCoercion(self): 

1374 """Test python type coercion on Butler.get and put.""" 

1375 

1376 # Store some data with the normal example storage class. 

1377 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1378 datasetTypeName = "test_metric" 

1379 butler, _ = self.create_butler("ingest", storageClass, datasetTypeName) 

1380 

1381 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1382 

1383 # Put a dict and this should coerce to a MetricsExample 

1384 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1385 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1386 test_metric = butler.getDirect(metric_ref) 

1387 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1388 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1389 self.assertEqual(test_metric.output, test_dict["output"]) 

1390 

1391 # Check that the put still works if a DatasetType is given with 

1392 # a definition matching this python type. 

1393 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1394 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1395 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1396 self.assertEqual(metric2_ref.datasetType, registry_type) 

1397 

1398 # The get will return the type expected by registry. 

1399 test_metric2 = butler.getDirect(metric2_ref) 

1400 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1401 

1402 # Make a new DatasetRef with the compatible but different DatasetType. 

1403 # This should now return a dict. 

1404 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1405 test_dict2 = butler.getDirect(new_ref) 

1406 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1407 

1408 # Get it again with the wrong dataset type definition using get() 

1409 # rather than getDirect(). This should be consistent with getDirect() 

1410 # behavior and return the type of the DatasetType. 

1411 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1412 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1413 

1414 def testPytypeCoercion(self): 

1415 """Test python type coercion on Butler.get and put.""" 

1416 

1417 # Store some data with the normal example storage class. 

1418 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1419 datasetTypeName = "test_metric" 

1420 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1421 

1422 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1423 metric = butler.get(datasetTypeName, dataId=dataId) 

1424 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1425 

1426 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1427 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1428 

1429 # Now need to hack the registry dataset type definition. 

1430 # There is no API for this. 

1431 manager = butler.registry._managers.datasets 

1432 manager._db.update( 

1433 manager._static.dataset_type, 

1434 {"name": datasetTypeName}, 

1435 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1436 ) 

1437 

1438 # Force reset of dataset type cache 

1439 butler.registry.refresh() 

1440 

1441 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1442 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1443 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1444 

1445 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1446 self.assertNotEqual(type(metric_model), type(metric)) 

1447 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1448 

1449 # Put the model and read it back to show that everything now 

1450 # works as normal. 

1451 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1452 metric_model_new = butler.get(metric_ref) 

1453 self.assertEqual(metric_model_new, metric_model) 

1454 

1455 # Hack the storage class again to something that will fail on the 

1456 # get with no conversion class. 

1457 manager._db.update( 

1458 manager._static.dataset_type, 

1459 {"name": datasetTypeName}, 

1460 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1461 ) 

1462 butler.registry.refresh() 

1463 

1464 with self.assertRaises(ValueError): 

1465 butler.get(datasetTypeName, dataId=dataId) 

1466 

1467 

1468class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1469 """InMemoryDatastore specialization of a butler""" 

1470 

1471 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1472 fullConfigKey = None 

1473 useTempRoot = False 

1474 validationCanFail = False 

1475 datastoreStr = ["datastore='InMemory"] 

1476 datastoreName = ["InMemoryDatastore@"] 

1477 registryStr = "/gen3.sqlite3" 

1478 

1479 def testIngest(self): 

1480 pass 

1481 

1482 

1483class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1484 """PosixDatastore specialization""" 

1485 

1486 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1487 fullConfigKey = ".datastore.datastores.1.formatters" 

1488 validationCanFail = True 

1489 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1490 datastoreName = [ 

1491 "InMemoryDatastore@", 

1492 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1493 "SecondDatastore", 

1494 ] 

1495 registryStr = "/gen3.sqlite3" 

1496 

1497 

1498class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1499 """Test that a yaml file in one location can refer to a root in another.""" 

1500 

1501 datastoreStr = ["dir1"] 

1502 # Disable the makeRepo test since we are deliberately not using 

1503 # butler.yaml as the config name. 

1504 fullConfigKey = None 

1505 

1506 def setUp(self): 

1507 self.root = makeTestTempDir(TESTDIR) 

1508 

1509 # Make a new repository in one place 

1510 self.dir1 = os.path.join(self.root, "dir1") 

1511 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1512 

1513 # Move the yaml file to a different place and add a "root" 

1514 self.dir2 = os.path.join(self.root, "dir2") 

1515 os.makedirs(self.dir2, exist_ok=True) 

1516 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1517 config = Config(configFile1) 

1518 config["root"] = self.dir1 

1519 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1520 config.dumpToUri(configFile2) 

1521 os.remove(configFile1) 

1522 self.tmpConfigFile = configFile2 

1523 

1524 def testFileLocations(self): 

1525 self.assertNotEqual(self.dir1, self.dir2) 

1526 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1527 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1528 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1529 

1530 

1531class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1532 """Test that a config file created by makeRepo outside of repo works.""" 

1533 

1534 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1535 

1536 def setUp(self): 

1537 self.root = makeTestTempDir(TESTDIR) 

1538 self.root2 = makeTestTempDir(TESTDIR) 

1539 

1540 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1541 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1542 

1543 def tearDown(self): 

1544 if os.path.exists(self.root2): 

1545 shutil.rmtree(self.root2, ignore_errors=True) 

1546 super().tearDown() 

1547 

1548 def testConfigExistence(self): 

1549 c = Config(self.tmpConfigFile) 

1550 uri_config = ResourcePath(c["root"]) 

1551 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1552 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1553 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1554 

1555 def testPutGet(self): 

1556 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1557 self.runPutGetTest(storageClass, "test_metric") 

1558 

1559 

1560class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1561 """Test that a config file created by makeRepo outside of repo works.""" 

1562 

1563 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1564 

1565 def setUp(self): 

1566 self.root = makeTestTempDir(TESTDIR) 

1567 self.root2 = makeTestTempDir(TESTDIR) 

1568 

1569 self.tmpConfigFile = self.root2 

1570 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1571 

1572 def testConfigExistence(self): 

1573 # Append the yaml file else Config constructor does not know the file 

1574 # type. 

1575 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1576 super().testConfigExistence() 

1577 

1578 

1579class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1580 """Test that a config file created by makeRepo outside of repo works.""" 

1581 

1582 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1583 

1584 def setUp(self): 

1585 self.root = makeTestTempDir(TESTDIR) 

1586 self.root2 = makeTestTempDir(TESTDIR) 

1587 

1588 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1589 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1590 

1591 

1592@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1593class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1594 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1595 a local in-memory SqlRegistry. 

1596 """ 

1597 

1598 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1599 fullConfigKey = None 

1600 validationCanFail = True 

1601 

1602 bucketName = "anybucketname" 

1603 """Name of the Bucket that will be used in the tests. The name is read from 

1604 the config file used with the tests during set-up. 

1605 """ 

1606 

1607 root = "butlerRoot/" 

1608 """Root repository directory expected to be used in case useTempRoot=False. 

1609 Otherwise the root is set to a 20 characters long randomly generated string 

1610 during set-up. 

1611 """ 

1612 

1613 datastoreStr = [f"datastore={root}"] 

1614 """Contains all expected root locations in a format expected to be 

1615 returned by Butler stringification. 

1616 """ 

1617 

1618 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1619 """The expected format of the S3 Datastore string.""" 

1620 

1621 registryStr = "/gen3.sqlite3" 

1622 """Expected format of the Registry string.""" 

1623 

1624 mock_s3 = mock_s3() 

1625 """The mocked s3 interface from moto.""" 

1626 

1627 def genRoot(self): 

1628 """Returns a random string of len 20 to serve as a root 

1629 name for the temporary bucket repo. 

1630 

1631 This is equivalent to tempfile.mkdtemp as this is what self.root 

1632 becomes when useTempRoot is True. 

1633 """ 

1634 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1635 return rndstr + "/" 

1636 

1637 def setUp(self): 

1638 config = Config(self.configFile) 

1639 uri = ResourcePath(config[".datastore.datastore.root"]) 

1640 self.bucketName = uri.netloc 

1641 

1642 # Enable S3 mocking of tests. 

1643 self.mock_s3.start() 

1644 

1645 # set up some fake credentials if they do not exist 

1646 self.usingDummyCredentials = setAwsEnvCredentials() 

1647 

1648 if self.useTempRoot: 

1649 self.root = self.genRoot() 

1650 rooturi = f"s3://{self.bucketName}/{self.root}" 

1651 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1652 

1653 # need local folder to store registry database 

1654 self.reg_dir = makeTestTempDir(TESTDIR) 

1655 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1656 

1657 # MOTO needs to know that we expect Bucket bucketname to exist 

1658 # (this used to be the class attribute bucketName) 

1659 s3 = boto3.resource("s3") 

1660 s3.create_bucket(Bucket=self.bucketName) 

1661 

1662 self.datastoreStr = f"datastore={self.root}" 

1663 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1664 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1665 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1666 

1667 def tearDown(self): 

1668 s3 = boto3.resource("s3") 

1669 bucket = s3.Bucket(self.bucketName) 

1670 try: 

1671 bucket.objects.all().delete() 

1672 except botocore.exceptions.ClientError as e: 

1673 if e.response["Error"]["Code"] == "404": 

1674 # the key was not reachable - pass 

1675 pass 

1676 else: 

1677 raise 

1678 

1679 bucket = s3.Bucket(self.bucketName) 

1680 bucket.delete() 

1681 

1682 # Stop the S3 mock. 

1683 self.mock_s3.stop() 

1684 

1685 # unset any potentially set dummy credentials 

1686 if self.usingDummyCredentials: 

1687 unsetAwsEnvCredentials() 

1688 

1689 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1690 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1691 

1692 if self.useTempRoot and os.path.exists(self.root): 

1693 shutil.rmtree(self.root, ignore_errors=True) 

1694 

1695 

1696@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1697# Mock required environment variables during tests 

1698@unittest.mock.patch.dict( 

1699 os.environ, 

1700 { 

1701 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1702 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1703 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1704 }, 

1705) 

1706class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1707 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1708 a local in-memory SqlRegistry. 

1709 """ 

1710 

1711 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1712 fullConfigKey = None 

1713 validationCanFail = True 

1714 

1715 serverName = "localhost" 

1716 """Name of the server that will be used in the tests. 

1717 """ 

1718 

1719 portNumber = 8080 

1720 """Port on which the webdav server listens. Automatically chosen 

1721 at setUpClass via the _getfreeport() method 

1722 """ 

1723 

1724 root = "butlerRoot/" 

1725 """Root repository directory expected to be used in case useTempRoot=False. 

1726 Otherwise the root is set to a 20 characters long randomly generated string 

1727 during set-up. 

1728 """ 

1729 

1730 datastoreStr = [f"datastore={root}"] 

1731 """Contains all expected root locations in a format expected to be 

1732 returned by Butler stringification. 

1733 """ 

1734 

1735 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1736 """The expected format of the WebdavDatastore string.""" 

1737 

1738 registryStr = "/gen3.sqlite3" 

1739 """Expected format of the Registry string.""" 

1740 

1741 serverThread = None 

1742 """Thread in which the local webdav server will run""" 

1743 

1744 stopWebdavServer = False 

1745 """This flag will cause the webdav server to 

1746 gracefully shut down when True 

1747 """ 

1748 

1749 def genRoot(self): 

1750 """Returns a random string of len 20 to serve as a root 

1751 name for the temporary bucket repo. 

1752 

1753 This is equivalent to tempfile.mkdtemp as this is what self.root 

1754 becomes when useTempRoot is True. 

1755 """ 

1756 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1757 return rndstr + "/" 

1758 

1759 @classmethod 

1760 def setUpClass(cls): 

1761 # Do the same as inherited class 

1762 cls.storageClassFactory = StorageClassFactory() 

1763 cls.storageClassFactory.addFromConfig(cls.configFile) 

1764 

1765 cls.portNumber = cls._getfreeport() 

1766 # Run a local webdav server on which tests will be run 

1767 cls.serverThread = Thread( 

1768 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1769 ) 

1770 cls.serverThread.start() 

1771 # Wait for it to start 

1772 time.sleep(3) 

1773 

1774 @classmethod 

1775 def tearDownClass(cls): 

1776 # Ask for graceful shut down of the webdav server 

1777 cls.stopWebdavServer = True 

1778 # Wait for the thread to exit 

1779 cls.serverThread.join() 

1780 

1781 # Mock required environment variables during tests 

1782 @unittest.mock.patch.dict( 

1783 os.environ, 

1784 { 

1785 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1786 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1787 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1788 }, 

1789 ) 

1790 def setUp(self): 

1791 config = Config(self.configFile) 

1792 

1793 if self.useTempRoot: 

1794 self.root = self.genRoot() 

1795 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1796 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1797 

1798 # need local folder to store registry database 

1799 self.reg_dir = makeTestTempDir(TESTDIR) 

1800 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1801 

1802 self.datastoreStr = f"datastore={self.root}" 

1803 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1804 

1805 if not isWebdavEndpoint(self.rooturi): 

1806 raise OSError("Webdav server not running properly: cannot run tests.") 

1807 

1808 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1809 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1810 

1811 # Mock required environment variables during tests 

1812 @unittest.mock.patch.dict( 

1813 os.environ, 

1814 { 

1815 "LSST_BUTLER_WEBDAV_AUTH": "TOKEN", 

1816 "LSST_BUTLER_WEBDAV_TOKEN_FILE": os.path.join(TESTDIR, "config/testConfigs/webdav/token"), 

1817 "LSST_BUTLER_WEBDAV_CA_BUNDLE": "/path/to/ca/certs", 

1818 }, 

1819 ) 

1820 def tearDown(self): 

1821 # Clear temporary directory 

1822 ResourcePath(self.rooturi).remove() 

1823 ResourcePath(self.rooturi).session.close() 

1824 

1825 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1826 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1827 

1828 if self.useTempRoot and os.path.exists(self.root): 

1829 shutil.rmtree(self.root, ignore_errors=True) 

1830 

1831 def _serveWebdav(self, port: int, stopWebdavServer): 

1832 """Starts a local webdav-compatible HTTP server, 

1833 Listening on http://localhost:port 

1834 This server only runs when this test class is instantiated, 

1835 and then shuts down. Must be started is a separate thread. 

1836 

1837 Parameters 

1838 ---------- 

1839 port : `int` 

1840 The port number on which the server should listen 

1841 """ 

1842 root_path = gettempdir() 

1843 

1844 config = { 

1845 "host": "0.0.0.0", 

1846 "port": port, 

1847 "provider_mapping": {"/": root_path}, 

1848 "http_authenticator": {"domain_controller": None}, 

1849 "simple_dc": {"user_mapping": {"*": True}}, 

1850 "verbose": 0, 

1851 } 

1852 app = WsgiDAVApp(config) 

1853 

1854 server_args = { 

1855 "bind_addr": (config["host"], config["port"]), 

1856 "wsgi_app": app, 

1857 } 

1858 server = wsgi.Server(**server_args) 

1859 server.prepare() 

1860 

1861 try: 

1862 # Start the actual server in a separate thread 

1863 t = Thread(target=server.serve, daemon=True) 

1864 t.start() 

1865 # watch stopWebdavServer, and gracefully 

1866 # shut down the server when True 

1867 while True: 

1868 if stopWebdavServer(): 

1869 break 

1870 time.sleep(1) 

1871 except KeyboardInterrupt: 

1872 print("Caught Ctrl-C, shutting down...") 

1873 finally: 

1874 server.stop() 

1875 t.join() 

1876 

1877 def _getfreeport(): 

1878 """ 

1879 Determines a free port using sockets. 

1880 """ 

1881 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1882 free_socket.bind(("0.0.0.0", 0)) 

1883 free_socket.listen() 

1884 port = free_socket.getsockname()[1] 

1885 free_socket.close() 

1886 return port 

1887 

1888 

1889class PosixDatastoreTransfers(unittest.TestCase): 

1890 """Test data transfers between butlers. 

1891 

1892 Test for different managers. UUID to UUID and integer to integer are 

1893 tested. UUID to integer is not supported since we do not currently 

1894 want to allow that. Integer to UUID is supported with the caveat 

1895 that UUID4 will be generated and this will be incorrect for raw 

1896 dataset types. The test ignores that. 

1897 """ 

1898 

1899 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1900 

1901 @classmethod 

1902 def setUpClass(cls): 

1903 cls.storageClassFactory = StorageClassFactory() 

1904 cls.storageClassFactory.addFromConfig(cls.configFile) 

1905 

1906 def setUp(self): 

1907 self.root = makeTestTempDir(TESTDIR) 

1908 self.config = Config(self.configFile) 

1909 

1910 def tearDown(self): 

1911 removeTestTempDir(self.root) 

1912 

1913 def create_butler(self, manager, label): 

1914 config = Config(self.configFile) 

1915 config["registry", "managers", "datasets"] = manager 

1916 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1917 

1918 def create_butlers(self, manager1, manager2): 

1919 self.source_butler = self.create_butler(manager1, "1") 

1920 self.target_butler = self.create_butler(manager2, "2") 

1921 

1922 def testTransferUuidToUuid(self): 

1923 self.create_butlers( 

1924 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1925 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1926 ) 

1927 # Setting id_gen_map should have no effect here 

1928 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1929 

1930 def testTransferIntToInt(self): 

1931 self.create_butlers( 

1932 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1933 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1934 ) 

1935 # int dataset ID only allows UNIQUE 

1936 self.assertButlerTransfers() 

1937 

1938 def testTransferIntToUuid(self): 

1939 self.create_butlers( 

1940 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

1941 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1942 ) 

1943 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1944 

1945 def testTransferMissing(self): 

1946 """Test transfers where datastore records are missing. 

1947 

1948 This is how execution butler works. 

1949 """ 

1950 self.create_butlers( 

1951 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1952 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1953 ) 

1954 

1955 # Configure the source butler to allow trust. 

1956 self.source_butler.datastore.trustGetRequest = True 

1957 

1958 self.assertButlerTransfers(purge=True) 

1959 

1960 def testTransferMissingDisassembly(self): 

1961 """Test transfers where datastore records are missing. 

1962 

1963 This is how execution butler works. 

1964 """ 

1965 self.create_butlers( 

1966 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1967 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1968 ) 

1969 

1970 # Configure the source butler to allow trust. 

1971 self.source_butler.datastore.trustGetRequest = True 

1972 

1973 # Test disassembly. 

1974 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

1975 

1976 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

1977 """Test that a run can be transferred to another butler.""" 

1978 

1979 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

1980 datasetTypeName = "random_data" 

1981 

1982 # Test will create 3 collections and we will want to transfer 

1983 # two of those three. 

1984 runs = ["run1", "run2", "other"] 

1985 

1986 # Also want to use two different dataset types to ensure that 

1987 # grouping works. 

1988 datasetTypeNames = ["random_data", "random_data_2"] 

1989 

1990 # Create the run collections in the source butler. 

1991 for run in runs: 

1992 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

1993 

1994 # Create dimensions in both butlers (transfer will not create them). 

1995 n_exposures = 30 

1996 for butler in (self.source_butler, self.target_butler): 

1997 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1998 butler.registry.insertDimensionData( 

1999 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2000 ) 

2001 butler.registry.insertDimensionData( 

2002 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2003 ) 

2004 

2005 for i in range(n_exposures): 

2006 butler.registry.insertDimensionData( 

2007 "exposure", 

2008 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2009 ) 

2010 

2011 # Create dataset types in the source butler. 

2012 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2013 for datasetTypeName in datasetTypeNames: 

2014 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2015 self.source_butler.registry.registerDatasetType(datasetType) 

2016 

2017 # Write a dataset to an unrelated run -- this will ensure that 

2018 # we are rewriting integer dataset ids in the target if necessary. 

2019 # Will not be relevant for UUID. 

2020 run = "distraction" 

2021 butler = Butler(butler=self.source_butler, run=run) 

2022 butler.put( 

2023 makeExampleMetrics(), 

2024 datasetTypeName, 

2025 exposure=1, 

2026 instrument="DummyCamComp", 

2027 physical_filter="d-r", 

2028 ) 

2029 

2030 # Write some example metrics to the source 

2031 butler = Butler(butler=self.source_butler) 

2032 

2033 # Set of DatasetRefs that should be in the list of refs to transfer 

2034 # but which will not be transferred. 

2035 deleted = set() 

2036 

2037 n_expected = 20 # Number of datasets expected to be transferred 

2038 source_refs = [] 

2039 for i in range(n_exposures): 

2040 # Put a third of datasets into each collection, only retain 

2041 # two thirds. 

2042 index = i % 3 

2043 run = runs[index] 

2044 datasetTypeName = datasetTypeNames[i % 2] 

2045 

2046 metric_data = { 

2047 "summary": {"counter": i}, 

2048 "output": {"text": "metric"}, 

2049 "data": [2 * x for x in range(i)], 

2050 } 

2051 metric = MetricsExample(**metric_data) 

2052 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2053 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2054 

2055 # Remove the datastore record using low-level API 

2056 if purge: 

2057 # Remove records for a fraction. 

2058 if index == 1: 

2059 

2060 # For one of these delete the file as well. 

2061 # This allows the "missing" code to filter the 

2062 # file out. 

2063 if not deleted: 

2064 primary, uris = butler.datastore.getURIs(ref) 

2065 if primary: 

2066 primary.remove() 

2067 for uri in uris.values(): 

2068 uri.remove() 

2069 n_expected -= 1 

2070 deleted.add(ref) 

2071 

2072 # Remove the datastore record. 

2073 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2074 

2075 if index < 2: 

2076 source_refs.append(ref) 

2077 if ref not in deleted: 

2078 new_metric = butler.get(ref.unresolved(), collections=run) 

2079 self.assertEqual(new_metric, metric) 

2080 

2081 # Create some bad dataset types to ensure we check for inconsistent 

2082 # definitions. 

2083 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2084 for datasetTypeName in datasetTypeNames: 

2085 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2086 self.target_butler.registry.registerDatasetType(datasetType) 

2087 with self.assertRaises(ConflictingDefinitionError): 

2088 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2089 # And remove the bad definitions. 

2090 for datasetTypeName in datasetTypeNames: 

2091 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2092 

2093 # Transfer without creating dataset types should fail. 

2094 with self.assertRaises(KeyError): 

2095 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2096 

2097 # Now transfer them to the second butler 

2098 with self.assertLogs(level=logging.DEBUG) as cm: 

2099 transferred = self.target_butler.transfer_from( 

2100 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2101 ) 

2102 self.assertEqual(len(transferred), n_expected) 

2103 log_output = ";".join(cm.output) 

2104 self.assertIn("found in datastore for chunk", log_output) 

2105 self.assertIn("Creating output run", log_output) 

2106 

2107 # Do the transfer twice to ensure that it will do nothing extra. 

2108 # Only do this if purge=True because it does not work for int 

2109 # dataset_id. 

2110 if purge: 

2111 # This should not need to register dataset types. 

2112 transferred = self.target_butler.transfer_from( 

2113 self.source_butler, source_refs, id_gen_map=id_gen_map 

2114 ) 

2115 self.assertEqual(len(transferred), n_expected) 

2116 

2117 # Also do an explicit low-level transfer to trigger some 

2118 # edge cases. 

2119 with self.assertLogs(level=logging.DEBUG) as cm: 

2120 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2121 log_output = ";".join(cm.output) 

2122 self.assertIn("no file artifacts exist", log_output) 

2123 

2124 with self.assertRaises(TypeError): 

2125 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2126 

2127 with self.assertRaises(ValueError): 

2128 self.target_butler.datastore.transfer_from( 

2129 self.source_butler.datastore, source_refs, transfer="split" 

2130 ) 

2131 

2132 # Now try to get the same refs from the new butler. 

2133 for ref in source_refs: 

2134 if ref not in deleted: 

2135 unresolved_ref = ref.unresolved() 

2136 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2137 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2138 self.assertEqual(new_metric, old_metric) 

2139 

2140 # Now prune run2 collection and create instead a CHAINED collection. 

2141 # This should block the transfer. 

2142 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2143 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2144 with self.assertRaises(TypeError): 

2145 # Re-importing the run1 datasets can be problematic if they 

2146 # use integer IDs so filter those out. 

2147 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2148 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2149 

2150 

2151if __name__ == "__main__": 2151 ↛ 2152line 2151 didn't jump to line 2152, because the condition on line 2151 was never true

2152 unittest.main()