Coverage for tests/test_butler.py: 16%

1203 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-23 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionSearch, 

75 CollectionType, 

76 Config, 

77 DatasetIdGenEnum, 

78 DatasetRef, 

79 DatasetType, 

80 FileDataset, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testIngest(self): 

658 butler = Butler(self.tmpConfigFile, run=self.default_run) 

659 

660 # Create and register a DatasetType 

661 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

662 

663 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

664 datasetTypeName = "metric" 

665 

666 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

667 

668 # Add needed Dimensions 

669 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

670 butler.registry.insertDimensionData( 

671 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

672 ) 

673 for detector in (1, 2): 

674 butler.registry.insertDimensionData( 

675 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

676 ) 

677 

678 butler.registry.insertDimensionData( 

679 "visit", 

680 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

681 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

682 ) 

683 

684 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

685 dataRoot = os.path.join(TESTDIR, "data", "basic") 

686 datasets = [] 

687 for detector in (1, 2): 

688 detector_name = f"detector_{detector}" 

689 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

690 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

691 # Create a DatasetRef for ingest 

692 refIn = DatasetRef(datasetType, dataId, id=None) 

693 

694 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

695 

696 butler.ingest(*datasets, transfer="copy") 

697 

698 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

699 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

700 

701 metrics1 = butler.get(datasetTypeName, dataId1) 

702 metrics2 = butler.get(datasetTypeName, dataId2) 

703 self.assertNotEqual(metrics1, metrics2) 

704 

705 # Compare URIs 

706 uri1 = butler.getURI(datasetTypeName, dataId1) 

707 uri2 = butler.getURI(datasetTypeName, dataId2) 

708 self.assertNotEqual(uri1, uri2) 

709 

710 # Now do a multi-dataset but single file ingest 

711 metricFile = os.path.join(dataRoot, "detectors.yaml") 

712 refs = [] 

713 for detector in (1, 2): 

714 detector_name = f"detector_{detector}" 

715 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

716 # Create a DatasetRef for ingest 

717 refs.append(DatasetRef(datasetType, dataId, id=None)) 

718 

719 datasets = [] 

720 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

721 

722 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

723 

724 # Check that the datastore recorded no file size. 

725 # Not all datastores can support this. 

726 try: 

727 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

728 self.assertEqual(infos[0].file_size, -1) 

729 except AttributeError: 

730 pass 

731 

732 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

733 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

734 

735 multi1 = butler.get(datasetTypeName, dataId1) 

736 multi2 = butler.get(datasetTypeName, dataId2) 

737 

738 self.assertEqual(multi1, metrics1) 

739 self.assertEqual(multi2, metrics2) 

740 

741 # Compare URIs 

742 uri1 = butler.getURI(datasetTypeName, dataId1) 

743 uri2 = butler.getURI(datasetTypeName, dataId2) 

744 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

745 

746 # Test that removing one does not break the second 

747 # This line will issue a warning log message for a ChainedDatastore 

748 # that uses an InMemoryDatastore since in-memory can not ingest 

749 # files. 

750 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

751 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

752 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

753 multi2b = butler.get(datasetTypeName, dataId2) 

754 self.assertEqual(multi2, multi2b) 

755 

756 def testPruneCollections(self): 

757 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

758 butler = Butler(self.tmpConfigFile, writeable=True) 

759 # Load registry data with dimensions to hang datasets off of. 

760 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

761 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

762 # Add some RUN-type collections. 

763 run1 = "run1" 

764 butler.registry.registerRun(run1) 

765 run2 = "run2" 

766 butler.registry.registerRun(run2) 

767 # put some datasets. ref1 and ref2 have the same data ID, and are in 

768 # different runs. ref3 has a different data ID. 

769 metric = makeExampleMetrics() 

770 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

771 datasetType = self.addDatasetType( 

772 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

773 ) 

774 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

775 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

776 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

777 

778 # Try to delete a RUN collection without purge, or with purge and not 

779 # unstore. 

780 with self.assertRaises(TypeError): 

781 butler.pruneCollection(run1) 

782 with self.assertRaises(TypeError): 

783 butler.pruneCollection(run2, purge=True) 

784 # Add a TAGGED collection and associate ref3 only into it. 

785 tag1 = "tag1" 

786 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

787 self.assertTrue(registered) 

788 # Registering a second time should be allowed. 

789 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

790 self.assertFalse(registered) 

791 butler.registry.associate(tag1, [ref3]) 

792 # Add a CHAINED collection that searches run1 and then run2. It 

793 # logically contains only ref1, because ref2 is shadowed due to them 

794 # having the same data ID and dataset type. 

795 chain1 = "chain1" 

796 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

797 butler.registry.setCollectionChain(chain1, [run1, run2]) 

798 # Try to delete RUN collections, which should fail with complete 

799 # rollback because they're still referenced by the CHAINED 

800 # collection. 

801 with self.assertRaises(Exception): 

802 butler.pruneCollection(run1, pruge=True, unstore=True) 

803 with self.assertRaises(Exception): 

804 butler.pruneCollection(run2, pruge=True, unstore=True) 

805 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

806 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

807 self.assertTrue(existence[ref1]) 

808 self.assertTrue(existence[ref2]) 

809 self.assertTrue(existence[ref3]) 

810 # Try to delete CHAINED and TAGGED collections with purge; should not 

811 # work. 

812 with self.assertRaises(TypeError): 

813 butler.pruneCollection(tag1, purge=True, unstore=True) 

814 with self.assertRaises(TypeError): 

815 butler.pruneCollection(chain1, purge=True, unstore=True) 

816 # Remove the tagged collection with unstore=False. This should not 

817 # affect the datasets. 

818 butler.pruneCollection(tag1) 

819 with self.assertRaises(MissingCollectionError): 

820 butler.registry.getCollectionType(tag1) 

821 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

822 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

823 self.assertTrue(existence[ref1]) 

824 self.assertTrue(existence[ref2]) 

825 self.assertTrue(existence[ref3]) 

826 # Add the tagged collection back in, and remove it with unstore=True. 

827 # This should remove ref3 only from the datastore. 

828 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

829 butler.registry.associate(tag1, [ref3]) 

830 butler.pruneCollection(tag1, unstore=True) 

831 with self.assertRaises(MissingCollectionError): 

832 butler.registry.getCollectionType(tag1) 

833 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

834 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

835 self.assertTrue(existence[ref1]) 

836 self.assertTrue(existence[ref2]) 

837 self.assertFalse(existence[ref3]) 

838 # Delete the chain with unstore=False. The datasets should not be 

839 # affected at all. 

840 butler.pruneCollection(chain1) 

841 with self.assertRaises(MissingCollectionError): 

842 butler.registry.getCollectionType(chain1) 

843 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

844 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

845 self.assertTrue(existence[ref1]) 

846 self.assertTrue(existence[ref2]) 

847 self.assertFalse(existence[ref3]) 

848 # Redefine and then delete the chain with unstore=True. Only ref1 

849 # should be unstored (ref3 has already been unstored, but otherwise 

850 # would be now). 

851 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

852 butler.registry.setCollectionChain(chain1, [run1, run2]) 

853 butler.pruneCollection(chain1, unstore=True) 

854 with self.assertRaises(MissingCollectionError): 

855 butler.registry.getCollectionType(chain1) 

856 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

857 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

858 self.assertFalse(existence[ref1]) 

859 self.assertTrue(existence[ref2]) 

860 self.assertFalse(existence[ref3]) 

861 # Remove run1. This removes ref1 and ref3 from the registry (they're 

862 # already gone from the datastore, which is fine). 

863 butler.pruneCollection(run1, purge=True, unstore=True) 

864 with self.assertRaises(MissingCollectionError): 

865 butler.registry.getCollectionType(run1) 

866 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

867 self.assertTrue(butler.datastore.exists(ref2)) 

868 # Remove run2. This removes ref2 from the registry and the datastore. 

869 butler.pruneCollection(run2, purge=True, unstore=True) 

870 with self.assertRaises(MissingCollectionError): 

871 butler.registry.getCollectionType(run2) 

872 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

873 

874 # Now that the collections have been pruned we can remove the 

875 # dataset type 

876 butler.registry.removeDatasetType(datasetType.name) 

877 

878 def testPickle(self): 

879 """Test pickle support.""" 

880 butler = Butler(self.tmpConfigFile, run=self.default_run) 

881 butlerOut = pickle.loads(pickle.dumps(butler)) 

882 self.assertIsInstance(butlerOut, Butler) 

883 self.assertEqual(butlerOut._config, butler._config) 

884 self.assertEqual(butlerOut.collections, butler.collections) 

885 self.assertEqual(butlerOut.run, butler.run) 

886 

887 def testGetDatasetTypes(self): 

888 butler = Butler(self.tmpConfigFile, run=self.default_run) 

889 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

890 dimensionEntries = [ 

891 ( 

892 "instrument", 

893 {"instrument": "DummyCam"}, 

894 {"instrument": "DummyHSC"}, 

895 {"instrument": "DummyCamComp"}, 

896 ), 

897 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

898 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

899 ] 

900 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

901 # Add needed Dimensions 

902 for args in dimensionEntries: 

903 butler.registry.insertDimensionData(*args) 

904 

905 # When a DatasetType is added to the registry entries are not created 

906 # for components but querying them can return the components. 

907 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

908 components = set() 

909 for datasetTypeName in datasetTypeNames: 

910 # Create and register a DatasetType 

911 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

912 

913 for componentName in storageClass.components: 

914 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

915 

916 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

917 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

918 

919 # Now that we have some dataset types registered, validate them 

920 butler.validateConfiguration( 

921 ignore=[ 

922 "test_metric_comp", 

923 "metric3", 

924 "calexp", 

925 "DummySC", 

926 "datasetType.component", 

927 "random_data", 

928 "random_data_2", 

929 ] 

930 ) 

931 

932 # Add a new datasetType that will fail template validation 

933 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

934 if self.validationCanFail: 

935 with self.assertRaises(ValidationError): 

936 butler.validateConfiguration() 

937 

938 # Rerun validation but with a subset of dataset type names 

939 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

940 

941 # Rerun validation but ignore the bad datasetType 

942 butler.validateConfiguration( 

943 ignore=[ 

944 "test_metric_comp", 

945 "metric3", 

946 "calexp", 

947 "DummySC", 

948 "datasetType.component", 

949 "random_data", 

950 "random_data_2", 

951 ] 

952 ) 

953 

954 def testTransaction(self): 

955 butler = Butler(self.tmpConfigFile, run=self.default_run) 

956 datasetTypeName = "test_metric" 

957 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

958 dimensionEntries = ( 

959 ("instrument", {"instrument": "DummyCam"}), 

960 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

961 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

962 ) 

963 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

964 metric = makeExampleMetrics() 

965 dataId = {"instrument": "DummyCam", "visit": 42} 

966 # Create and register a DatasetType 

967 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

968 with self.assertRaises(TransactionTestError): 

969 with butler.transaction(): 

970 # Add needed Dimensions 

971 for args in dimensionEntries: 

972 butler.registry.insertDimensionData(*args) 

973 # Store a dataset 

974 ref = butler.put(metric, datasetTypeName, dataId) 

975 self.assertIsInstance(ref, DatasetRef) 

976 # Test getDirect 

977 metricOut = butler.getDirect(ref) 

978 self.assertEqual(metric, metricOut) 

979 # Test get 

980 metricOut = butler.get(datasetTypeName, dataId) 

981 self.assertEqual(metric, metricOut) 

982 # Check we can get components 

983 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

984 raise TransactionTestError("This should roll back the entire transaction") 

985 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

986 butler.registry.expandDataId(dataId) 

987 # Should raise LookupError for missing data ID value 

988 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

989 butler.get(datasetTypeName, dataId) 

990 # Also check explicitly if Dataset entry is missing 

991 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

992 # Direct retrieval should not find the file in the Datastore 

993 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

994 butler.getDirect(ref) 

995 

996 def testMakeRepo(self): 

997 """Test that we can write butler configuration to a new repository via 

998 the Butler.makeRepo interface and then instantiate a butler from the 

999 repo root. 

1000 """ 

1001 # Do not run the test if we know this datastore configuration does 

1002 # not support a file system root 

1003 if self.fullConfigKey is None: 

1004 return 

1005 

1006 # create two separate directories 

1007 root1 = tempfile.mkdtemp(dir=self.root) 

1008 root2 = tempfile.mkdtemp(dir=self.root) 

1009 

1010 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1011 limited = Config(self.configFile) 

1012 butler1 = Butler(butlerConfig) 

1013 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1014 full = Config(self.tmpConfigFile) 

1015 butler2 = Butler(butlerConfig) 

1016 # Butlers should have the same configuration regardless of whether 

1017 # defaults were expanded. 

1018 self.assertEqual(butler1._config, butler2._config) 

1019 # Config files loaded directly should not be the same. 

1020 self.assertNotEqual(limited, full) 

1021 # Make sure "limited" doesn't have a few keys we know it should be 

1022 # inheriting from defaults. 

1023 self.assertIn(self.fullConfigKey, full) 

1024 self.assertNotIn(self.fullConfigKey, limited) 

1025 

1026 # Collections don't appear until something is put in them 

1027 collections1 = set(butler1.registry.queryCollections()) 

1028 self.assertEqual(collections1, set()) 

1029 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1030 

1031 # Check that a config with no associated file name will not 

1032 # work properly with relocatable Butler repo 

1033 butlerConfig.configFile = None 

1034 with self.assertRaises(ValueError): 

1035 Butler(butlerConfig) 

1036 

1037 with self.assertRaises(FileExistsError): 

1038 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1039 

1040 def testStringification(self): 

1041 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1042 butlerStr = str(butler) 

1043 

1044 if self.datastoreStr is not None: 

1045 for testStr in self.datastoreStr: 

1046 self.assertIn(testStr, butlerStr) 

1047 if self.registryStr is not None: 

1048 self.assertIn(self.registryStr, butlerStr) 

1049 

1050 datastoreName = butler.datastore.name 

1051 if self.datastoreName is not None: 

1052 for testStr in self.datastoreName: 

1053 self.assertIn(testStr, datastoreName) 

1054 

1055 def testButlerRewriteDataId(self): 

1056 """Test that dataIds can be rewritten based on dimension records.""" 

1057 

1058 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1059 

1060 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1061 datasetTypeName = "random_data" 

1062 

1063 # Create dimension records. 

1064 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1065 butler.registry.insertDimensionData( 

1066 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1067 ) 

1068 butler.registry.insertDimensionData( 

1069 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1070 ) 

1071 

1072 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1073 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1074 butler.registry.registerDatasetType(datasetType) 

1075 

1076 n_exposures = 5 

1077 dayobs = 20210530 

1078 

1079 for i in range(n_exposures): 

1080 butler.registry.insertDimensionData( 

1081 "exposure", 

1082 { 

1083 "instrument": "DummyCamComp", 

1084 "id": i, 

1085 "obs_id": f"exp{i}", 

1086 "seq_num": i, 

1087 "day_obs": dayobs, 

1088 "physical_filter": "d-r", 

1089 }, 

1090 ) 

1091 

1092 # Write some data. 

1093 for i in range(n_exposures): 

1094 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1095 

1096 # Use the seq_num for the put to test rewriting. 

1097 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1098 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1099 

1100 # Check that the exposure is correct in the dataId 

1101 self.assertEqual(ref.dataId["exposure"], i) 

1102 

1103 # and check that we can get the dataset back with the same dataId 

1104 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1105 self.assertEqual(new_metric, metric) 

1106 

1107 

1108class FileDatastoreButlerTests(ButlerTests): 

1109 """Common tests and specialization of ButlerTests for butlers backed 

1110 by datastores that inherit from FileDatastore. 

1111 """ 

1112 

1113 def checkFileExists(self, root, relpath): 

1114 """Checks if file exists at a given path (relative to root). 

1115 

1116 Test testPutTemplates verifies actual physical existance of the files 

1117 in the requested location. 

1118 """ 

1119 uri = ResourcePath(root, forceDirectory=True) 

1120 return uri.join(relpath).exists() 

1121 

1122 def testPutTemplates(self): 

1123 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1124 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1125 

1126 # Add needed Dimensions 

1127 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1128 butler.registry.insertDimensionData( 

1129 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1130 ) 

1131 butler.registry.insertDimensionData( 

1132 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1133 ) 

1134 butler.registry.insertDimensionData( 

1135 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1136 ) 

1137 

1138 # Create and store a dataset 

1139 metric = makeExampleMetrics() 

1140 

1141 # Create two almost-identical DatasetTypes (both will use default 

1142 # template) 

1143 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1144 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1145 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1146 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1147 

1148 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1149 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1150 

1151 # Put with exactly the data ID keys needed 

1152 ref = butler.put(metric, "metric1", dataId1) 

1153 uri = butler.getURI(ref) 

1154 self.assertTrue( 

1155 self.checkFileExists( 

1156 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1157 ), 

1158 f"Checking existence of {uri}", 

1159 ) 

1160 

1161 # Check the template based on dimensions 

1162 butler.datastore.templates.validateTemplates([ref]) 

1163 

1164 # Put with extra data ID keys (physical_filter is an optional 

1165 # dependency); should not change template (at least the way we're 

1166 # defining them to behave now; the important thing is that they 

1167 # must be consistent). 

1168 ref = butler.put(metric, "metric2", dataId2) 

1169 uri = butler.getURI(ref) 

1170 self.assertTrue( 

1171 self.checkFileExists( 

1172 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1173 ), 

1174 f"Checking existence of {uri}", 

1175 ) 

1176 

1177 # Check the template based on dimensions 

1178 butler.datastore.templates.validateTemplates([ref]) 

1179 

1180 # Now use a file template that will not result in unique filenames 

1181 with self.assertRaises(FileTemplateValidationError): 

1182 butler.put(metric, "metric3", dataId1) 

1183 

1184 def testImportExport(self): 

1185 # Run put/get tests just to create and populate a repo. 

1186 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1187 self.runImportExportTest(storageClass) 

1188 

1189 @unittest.expectedFailure 

1190 def testImportExportVirtualComposite(self): 

1191 # Run put/get tests just to create and populate a repo. 

1192 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1193 self.runImportExportTest(storageClass) 

1194 

1195 def runImportExportTest(self, storageClass): 

1196 """This test does an export to a temp directory and an import back 

1197 into a new temp directory repo. It does not assume a posix datastore""" 

1198 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1199 print("Root:", exportButler.datastore.root) 

1200 # Test that the repo actually has at least one dataset. 

1201 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1202 self.assertGreater(len(datasets), 0) 

1203 # Add a DimensionRecord that's unused by those datasets. 

1204 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1205 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1206 # Export and then import datasets. 

1207 with safeTestTempDir(TESTDIR) as exportDir: 

1208 exportFile = os.path.join(exportDir, "exports.yaml") 

1209 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1210 export.saveDatasets(datasets) 

1211 # Export the same datasets again. This should quietly do 

1212 # nothing because of internal deduplication, and it shouldn't 

1213 # complain about being asked to export the "htm7" elements even 

1214 # though there aren't any in these datasets or in the database. 

1215 export.saveDatasets(datasets, elements=["htm7"]) 

1216 # Save one of the data IDs again; this should be harmless 

1217 # because of internal deduplication. 

1218 export.saveDataIds([datasets[0].dataId]) 

1219 # Save some dimension records directly. 

1220 export.saveDimensionData("skymap", [skymapRecord]) 

1221 self.assertTrue(os.path.exists(exportFile)) 

1222 with safeTestTempDir(TESTDIR) as importDir: 

1223 # We always want this to be a local posix butler 

1224 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1225 # Calling script.butlerImport tests the implementation of the 

1226 # butler command line interface "import" subcommand. Functions 

1227 # in the script folder are generally considered protected and 

1228 # should not be used as public api. 

1229 with open(exportFile, "r") as f: 

1230 script.butlerImport( 

1231 importDir, 

1232 export_file=f, 

1233 directory=exportDir, 

1234 transfer="auto", 

1235 skip_dimensions=None, 

1236 reuse_ids=False, 

1237 ) 

1238 importButler = Butler(importDir, run=self.default_run) 

1239 for ref in datasets: 

1240 with self.subTest(ref=ref): 

1241 # Test for existence by passing in the DatasetType and 

1242 # data ID separately, to avoid lookup by dataset_id. 

1243 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1244 self.assertEqual( 

1245 list(importButler.registry.queryDimensionRecords("skymap")), 

1246 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1247 ) 

1248 

1249 def testRemoveRuns(self): 

1250 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1251 butler = Butler(self.tmpConfigFile, writeable=True) 

1252 # Load registry data with dimensions to hang datasets off of. 

1253 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1254 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1255 # Add some RUN-type collection. 

1256 run1 = "run1" 

1257 butler.registry.registerRun(run1) 

1258 run2 = "run2" 

1259 butler.registry.registerRun(run2) 

1260 # put a dataset in each 

1261 metric = makeExampleMetrics() 

1262 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1263 datasetType = self.addDatasetType( 

1264 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1265 ) 

1266 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1267 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1268 uri1 = butler.getURI(ref1, collections=[run1]) 

1269 uri2 = butler.getURI(ref2, collections=[run2]) 

1270 # Remove from both runs with different values for unstore. 

1271 butler.removeRuns([run1], unstore=True) 

1272 butler.removeRuns([run2], unstore=False) 

1273 # Should be nothing in registry for either one, and datastore should 

1274 # not think either exists. 

1275 with self.assertRaises(MissingCollectionError): 

1276 butler.registry.getCollectionType(run1) 

1277 with self.assertRaises(MissingCollectionError): 

1278 butler.registry.getCollectionType(run2) 

1279 self.assertFalse(butler.datastore.exists(ref1)) 

1280 self.assertFalse(butler.datastore.exists(ref2)) 

1281 # The ref we unstored should be gone according to the URI, but the 

1282 # one we forgot should still be around. 

1283 self.assertFalse(uri1.exists()) 

1284 self.assertTrue(uri2.exists()) 

1285 

1286 

1287class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1288 """PosixDatastore specialization of a butler""" 

1289 

1290 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1291 fullConfigKey = ".datastore.formatters" 

1292 validationCanFail = True 

1293 datastoreStr = ["/tmp"] 

1294 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1295 registryStr = "/gen3.sqlite3" 

1296 

1297 def testPathConstructor(self): 

1298 """Independent test of constructor using PathLike.""" 

1299 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1300 self.assertIsInstance(butler, Butler) 

1301 

1302 # And again with a Path object with the butler yaml 

1303 path = pathlib.Path(self.tmpConfigFile) 

1304 butler = Butler(path, writeable=False) 

1305 self.assertIsInstance(butler, Butler) 

1306 

1307 # And again with a Path object without the butler yaml 

1308 # (making sure we skip it if the tmp config doesn't end 

1309 # in butler.yaml -- which is the case for a subclass) 

1310 if self.tmpConfigFile.endswith("butler.yaml"): 

1311 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1312 butler = Butler(path, writeable=False) 

1313 self.assertIsInstance(butler, Butler) 

1314 

1315 def testExportTransferCopy(self): 

1316 """Test local export using all transfer modes""" 

1317 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1318 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1319 # Test that the repo actually has at least one dataset. 

1320 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1321 self.assertGreater(len(datasets), 0) 

1322 uris = [exportButler.getURI(d) for d in datasets] 

1323 datastoreRoot = exportButler.datastore.root 

1324 

1325 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1326 

1327 for path in pathsInStore: 

1328 # Assume local file system 

1329 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1330 

1331 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1332 with safeTestTempDir(TESTDIR) as exportDir: 

1333 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1334 export.saveDatasets(datasets) 

1335 for path in pathsInStore: 

1336 self.assertTrue( 

1337 self.checkFileExists(exportDir, path), 

1338 f"Check that mode {transfer} exported files", 

1339 ) 

1340 

1341 def testPruneDatasets(self): 

1342 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1343 butler = Butler(self.tmpConfigFile, writeable=True) 

1344 # Load registry data with dimensions to hang datasets off of. 

1345 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1346 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1347 # Add some RUN-type collections. 

1348 run1 = "run1" 

1349 butler.registry.registerRun(run1) 

1350 run2 = "run2" 

1351 butler.registry.registerRun(run2) 

1352 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1353 # different runs. ref3 has a different data ID. 

1354 metric = makeExampleMetrics() 

1355 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1356 datasetType = self.addDatasetType( 

1357 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1358 ) 

1359 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1360 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1361 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1362 

1363 # Simple prune. 

1364 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1365 with self.assertRaises(LookupError): 

1366 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1367 

1368 # Put data back. 

1369 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1370 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1371 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1372 

1373 # Check that in normal mode, deleting the record will lead to 

1374 # trash not touching the file. 

1375 uri1 = butler.datastore.getURI(ref1) 

1376 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1377 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1378 butler.datastore.trash(ref1) 

1379 butler.datastore.emptyTrash() 

1380 self.assertTrue(uri1.exists()) 

1381 uri1.remove() # Clean it up. 

1382 

1383 # Simulate execution butler setup by deleting the datastore 

1384 # record but keeping the file around and trusting. 

1385 butler.datastore.trustGetRequest = True 

1386 uri2 = butler.datastore.getURI(ref2) 

1387 uri3 = butler.datastore.getURI(ref3) 

1388 self.assertTrue(uri2.exists()) 

1389 self.assertTrue(uri3.exists()) 

1390 

1391 # Remove the datastore record. 

1392 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1393 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1394 self.assertTrue(uri2.exists()) 

1395 butler.datastore.trash([ref2, ref3]) 

1396 # Immediate removal for ref2 file 

1397 self.assertFalse(uri2.exists()) 

1398 # But ref3 has to wait for the empty. 

1399 self.assertTrue(uri3.exists()) 

1400 butler.datastore.emptyTrash() 

1401 self.assertFalse(uri3.exists()) 

1402 

1403 # Clear out the datasets from registry. 

1404 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1405 

1406 def testPytypePutCoercion(self): 

1407 """Test python type coercion on Butler.get and put.""" 

1408 

1409 # Store some data with the normal example storage class. 

1410 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1411 datasetTypeName = "test_metric" 

1412 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1413 

1414 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1415 

1416 # Put a dict and this should coerce to a MetricsExample 

1417 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1418 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1419 test_metric = butler.getDirect(metric_ref) 

1420 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1421 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1422 self.assertEqual(test_metric.output, test_dict["output"]) 

1423 

1424 # Check that the put still works if a DatasetType is given with 

1425 # a definition matching this python type. 

1426 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1427 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1428 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1429 self.assertEqual(metric2_ref.datasetType, registry_type) 

1430 

1431 # The get will return the type expected by registry. 

1432 test_metric2 = butler.getDirect(metric2_ref) 

1433 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1434 

1435 # Make a new DatasetRef with the compatible but different DatasetType. 

1436 # This should now return a dict. 

1437 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1438 test_dict2 = butler.getDirect(new_ref) 

1439 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1440 

1441 # Get it again with the wrong dataset type definition using get() 

1442 # rather than getDirect(). This should be consistent with getDirect() 

1443 # behavior and return the type of the DatasetType. 

1444 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1445 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1446 

1447 def testPytypeCoercion(self): 

1448 """Test python type coercion on Butler.get and put.""" 

1449 

1450 # Store some data with the normal example storage class. 

1451 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1452 datasetTypeName = "test_metric" 

1453 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1454 

1455 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1456 metric = butler.get(datasetTypeName, dataId=dataId) 

1457 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1458 

1459 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1460 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1461 

1462 # Now need to hack the registry dataset type definition. 

1463 # There is no API for this. 

1464 manager = butler.registry._managers.datasets 

1465 manager._db.update( 

1466 manager._static.dataset_type, 

1467 {"name": datasetTypeName}, 

1468 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1469 ) 

1470 

1471 # Force reset of dataset type cache 

1472 butler.registry.refresh() 

1473 

1474 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1475 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1476 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1477 

1478 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1479 self.assertNotEqual(type(metric_model), type(metric)) 

1480 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1481 

1482 # Put the model and read it back to show that everything now 

1483 # works as normal. 

1484 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1485 metric_model_new = butler.get(metric_ref) 

1486 self.assertEqual(metric_model_new, metric_model) 

1487 

1488 # Hack the storage class again to something that will fail on the 

1489 # get with no conversion class. 

1490 manager._db.update( 

1491 manager._static.dataset_type, 

1492 {"name": datasetTypeName}, 

1493 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1494 ) 

1495 butler.registry.refresh() 

1496 

1497 with self.assertRaises(ValueError): 

1498 butler.get(datasetTypeName, dataId=dataId) 

1499 

1500 

1501@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1502class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1503 """PosixDatastore specialization of a butler using Postgres""" 

1504 

1505 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1506 fullConfigKey = ".datastore.formatters" 

1507 validationCanFail = True 

1508 datastoreStr = ["/tmp"] 

1509 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1510 registryStr = "PostgreSQL@test" 

1511 

1512 @staticmethod 

1513 def _handler(postgresql): 

1514 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1515 with engine.begin() as connection: 

1516 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1517 

1518 @classmethod 

1519 def setUpClass(cls): 

1520 # Create the postgres test server. 

1521 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1522 cache_initialized_db=True, on_initialized=cls._handler 

1523 ) 

1524 super().setUpClass() 

1525 

1526 @classmethod 

1527 def tearDownClass(cls): 

1528 # Clean up any lingering SQLAlchemy engines/connections 

1529 # so they're closed before we shut down the server. 

1530 gc.collect() 

1531 cls.postgresql.clear_cache() 

1532 super().tearDownClass() 

1533 

1534 def setUp(self): 

1535 self.server = self.postgresql() 

1536 

1537 # Need to add a registry section to the config. 

1538 self._temp_config = False 

1539 config = Config(self.configFile) 

1540 config["registry", "db"] = self.server.url() 

1541 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1542 config.dump(fh) 

1543 self.configFile = fh.name 

1544 self._temp_config = True 

1545 super().setUp() 

1546 

1547 def tearDown(self): 

1548 self.server.stop() 

1549 if self._temp_config and os.path.exists(self.configFile): 

1550 os.remove(self.configFile) 

1551 super().tearDown() 

1552 

1553 def testMakeRepo(self): 

1554 # The base class test assumes that it's using sqlite and assumes 

1555 # the config file is acceptable to sqlite. 

1556 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1557 

1558 

1559class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1560 """InMemoryDatastore specialization of a butler""" 

1561 

1562 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1563 fullConfigKey = None 

1564 useTempRoot = False 

1565 validationCanFail = False 

1566 datastoreStr = ["datastore='InMemory"] 

1567 datastoreName = ["InMemoryDatastore@"] 

1568 registryStr = "/gen3.sqlite3" 

1569 

1570 def testIngest(self): 

1571 pass 

1572 

1573 

1574class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1575 """PosixDatastore specialization""" 

1576 

1577 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1578 fullConfigKey = ".datastore.datastores.1.formatters" 

1579 validationCanFail = True 

1580 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1581 datastoreName = [ 

1582 "InMemoryDatastore@", 

1583 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1584 "SecondDatastore", 

1585 ] 

1586 registryStr = "/gen3.sqlite3" 

1587 

1588 

1589class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1590 """Test that a yaml file in one location can refer to a root in another.""" 

1591 

1592 datastoreStr = ["dir1"] 

1593 # Disable the makeRepo test since we are deliberately not using 

1594 # butler.yaml as the config name. 

1595 fullConfigKey = None 

1596 

1597 def setUp(self): 

1598 self.root = makeTestTempDir(TESTDIR) 

1599 

1600 # Make a new repository in one place 

1601 self.dir1 = os.path.join(self.root, "dir1") 

1602 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1603 

1604 # Move the yaml file to a different place and add a "root" 

1605 self.dir2 = os.path.join(self.root, "dir2") 

1606 os.makedirs(self.dir2, exist_ok=True) 

1607 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1608 config = Config(configFile1) 

1609 config["root"] = self.dir1 

1610 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1611 config.dumpToUri(configFile2) 

1612 os.remove(configFile1) 

1613 self.tmpConfigFile = configFile2 

1614 

1615 def testFileLocations(self): 

1616 self.assertNotEqual(self.dir1, self.dir2) 

1617 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1618 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1619 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1620 

1621 

1622class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1623 """Test that a config file created by makeRepo outside of repo works.""" 

1624 

1625 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1626 

1627 def setUp(self): 

1628 self.root = makeTestTempDir(TESTDIR) 

1629 self.root2 = makeTestTempDir(TESTDIR) 

1630 

1631 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1632 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1633 

1634 def tearDown(self): 

1635 if os.path.exists(self.root2): 

1636 shutil.rmtree(self.root2, ignore_errors=True) 

1637 super().tearDown() 

1638 

1639 def testConfigExistence(self): 

1640 c = Config(self.tmpConfigFile) 

1641 uri_config = ResourcePath(c["root"]) 

1642 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1643 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1644 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1645 

1646 def testPutGet(self): 

1647 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1648 self.runPutGetTest(storageClass, "test_metric") 

1649 

1650 

1651class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1652 """Test that a config file created by makeRepo outside of repo works.""" 

1653 

1654 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1655 

1656 def setUp(self): 

1657 self.root = makeTestTempDir(TESTDIR) 

1658 self.root2 = makeTestTempDir(TESTDIR) 

1659 

1660 self.tmpConfigFile = self.root2 

1661 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1662 

1663 def testConfigExistence(self): 

1664 # Append the yaml file else Config constructor does not know the file 

1665 # type. 

1666 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1667 super().testConfigExistence() 

1668 

1669 

1670class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1671 """Test that a config file created by makeRepo outside of repo works.""" 

1672 

1673 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1674 

1675 def setUp(self): 

1676 self.root = makeTestTempDir(TESTDIR) 

1677 self.root2 = makeTestTempDir(TESTDIR) 

1678 

1679 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1680 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1681 

1682 

1683@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1684class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1685 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1686 a local in-memory SqlRegistry. 

1687 """ 

1688 

1689 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1690 fullConfigKey = None 

1691 validationCanFail = True 

1692 

1693 bucketName = "anybucketname" 

1694 """Name of the Bucket that will be used in the tests. The name is read from 

1695 the config file used with the tests during set-up. 

1696 """ 

1697 

1698 root = "butlerRoot/" 

1699 """Root repository directory expected to be used in case useTempRoot=False. 

1700 Otherwise the root is set to a 20 characters long randomly generated string 

1701 during set-up. 

1702 """ 

1703 

1704 datastoreStr = [f"datastore={root}"] 

1705 """Contains all expected root locations in a format expected to be 

1706 returned by Butler stringification. 

1707 """ 

1708 

1709 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1710 """The expected format of the S3 Datastore string.""" 

1711 

1712 registryStr = "/gen3.sqlite3" 

1713 """Expected format of the Registry string.""" 

1714 

1715 mock_s3 = mock_s3() 

1716 """The mocked s3 interface from moto.""" 

1717 

1718 def genRoot(self): 

1719 """Returns a random string of len 20 to serve as a root 

1720 name for the temporary bucket repo. 

1721 

1722 This is equivalent to tempfile.mkdtemp as this is what self.root 

1723 becomes when useTempRoot is True. 

1724 """ 

1725 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1726 return rndstr + "/" 

1727 

1728 def setUp(self): 

1729 config = Config(self.configFile) 

1730 uri = ResourcePath(config[".datastore.datastore.root"]) 

1731 self.bucketName = uri.netloc 

1732 

1733 # Enable S3 mocking of tests. 

1734 self.mock_s3.start() 

1735 

1736 # set up some fake credentials if they do not exist 

1737 self.usingDummyCredentials = setAwsEnvCredentials() 

1738 

1739 if self.useTempRoot: 

1740 self.root = self.genRoot() 

1741 rooturi = f"s3://{self.bucketName}/{self.root}" 

1742 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1743 

1744 # need local folder to store registry database 

1745 self.reg_dir = makeTestTempDir(TESTDIR) 

1746 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1747 

1748 # MOTO needs to know that we expect Bucket bucketname to exist 

1749 # (this used to be the class attribute bucketName) 

1750 s3 = boto3.resource("s3") 

1751 s3.create_bucket(Bucket=self.bucketName) 

1752 

1753 self.datastoreStr = f"datastore={self.root}" 

1754 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1755 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1756 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1757 

1758 def tearDown(self): 

1759 s3 = boto3.resource("s3") 

1760 bucket = s3.Bucket(self.bucketName) 

1761 try: 

1762 bucket.objects.all().delete() 

1763 except botocore.exceptions.ClientError as e: 

1764 if e.response["Error"]["Code"] == "404": 

1765 # the key was not reachable - pass 

1766 pass 

1767 else: 

1768 raise 

1769 

1770 bucket = s3.Bucket(self.bucketName) 

1771 bucket.delete() 

1772 

1773 # Stop the S3 mock. 

1774 self.mock_s3.stop() 

1775 

1776 # unset any potentially set dummy credentials 

1777 if self.usingDummyCredentials: 

1778 unsetAwsEnvCredentials() 

1779 

1780 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1781 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1782 

1783 if self.useTempRoot and os.path.exists(self.root): 

1784 shutil.rmtree(self.root, ignore_errors=True) 

1785 

1786 super().tearDown() 

1787 

1788 

1789@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1790class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1791 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1792 a local in-memory SqlRegistry. 

1793 """ 

1794 

1795 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1796 fullConfigKey = None 

1797 validationCanFail = True 

1798 

1799 serverName = "localhost" 

1800 """Name of the server that will be used in the tests. 

1801 """ 

1802 

1803 portNumber = 8080 

1804 """Port on which the webdav server listens. Automatically chosen 

1805 at setUpClass via the _getfreeport() method 

1806 """ 

1807 

1808 root = "butlerRoot/" 

1809 """Root repository directory expected to be used in case useTempRoot=False. 

1810 Otherwise the root is set to a 20 characters long randomly generated string 

1811 during set-up. 

1812 """ 

1813 

1814 datastoreStr = [f"datastore={root}"] 

1815 """Contains all expected root locations in a format expected to be 

1816 returned by Butler stringification. 

1817 """ 

1818 

1819 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1820 """The expected format of the WebdavDatastore string.""" 

1821 

1822 registryStr = "/gen3.sqlite3" 

1823 """Expected format of the Registry string.""" 

1824 

1825 serverThread = None 

1826 """Thread in which the local webdav server will run""" 

1827 

1828 stopWebdavServer = False 

1829 """This flag will cause the webdav server to 

1830 gracefully shut down when True 

1831 """ 

1832 

1833 def genRoot(self): 

1834 """Returns a random string of len 20 to serve as a root 

1835 name for the temporary bucket repo. 

1836 

1837 This is equivalent to tempfile.mkdtemp as this is what self.root 

1838 becomes when useTempRoot is True. 

1839 """ 

1840 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1841 return rndstr + "/" 

1842 

1843 @classmethod 

1844 def setUpClass(cls): 

1845 # Do the same as inherited class 

1846 cls.storageClassFactory = StorageClassFactory() 

1847 cls.storageClassFactory.addFromConfig(cls.configFile) 

1848 

1849 cls.portNumber = cls._getfreeport() 

1850 # Run a local webdav server on which tests will be run 

1851 cls.serverThread = Thread( 

1852 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1853 ) 

1854 cls.serverThread.start() 

1855 # Wait for it to start 

1856 time.sleep(3) 

1857 

1858 @classmethod 

1859 def tearDownClass(cls): 

1860 # Ask for graceful shut down of the webdav server 

1861 cls.stopWebdavServer = True 

1862 # Wait for the thread to exit 

1863 cls.serverThread.join() 

1864 super().tearDownClass() 

1865 

1866 def setUp(self): 

1867 config = Config(self.configFile) 

1868 

1869 if self.useTempRoot: 

1870 self.root = self.genRoot() 

1871 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1872 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1873 

1874 # need local folder to store registry database 

1875 self.reg_dir = makeTestTempDir(TESTDIR) 

1876 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1877 

1878 self.datastoreStr = f"datastore={self.root}" 

1879 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1880 

1881 if not _is_webdav_endpoint(self.rooturi): 

1882 raise OSError("Webdav server not running properly: cannot run tests.") 

1883 

1884 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1885 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1886 

1887 def tearDown(self): 

1888 # Clear temporary directory 

1889 ResourcePath(self.rooturi).remove() 

1890 ResourcePath(self.rooturi).session.close() 

1891 

1892 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1893 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1894 

1895 if self.useTempRoot and os.path.exists(self.root): 

1896 shutil.rmtree(self.root, ignore_errors=True) 

1897 

1898 super().tearDown() 

1899 

1900 def _serveWebdav(self, port: int, stopWebdavServer): 

1901 """Starts a local webdav-compatible HTTP server, 

1902 Listening on http://localhost:port 

1903 This server only runs when this test class is instantiated, 

1904 and then shuts down. Must be started is a separate thread. 

1905 

1906 Parameters 

1907 ---------- 

1908 port : `int` 

1909 The port number on which the server should listen 

1910 """ 

1911 root_path = gettempdir() 

1912 

1913 config = { 

1914 "host": "0.0.0.0", 

1915 "port": port, 

1916 "provider_mapping": {"/": root_path}, 

1917 "http_authenticator": {"domain_controller": None}, 

1918 "simple_dc": {"user_mapping": {"*": True}}, 

1919 "verbose": 0, 

1920 } 

1921 app = WsgiDAVApp(config) 

1922 

1923 server_args = { 

1924 "bind_addr": (config["host"], config["port"]), 

1925 "wsgi_app": app, 

1926 } 

1927 server = wsgi.Server(**server_args) 

1928 server.prepare() 

1929 

1930 try: 

1931 # Start the actual server in a separate thread 

1932 t = Thread(target=server.serve, daemon=True) 

1933 t.start() 

1934 # watch stopWebdavServer, and gracefully 

1935 # shut down the server when True 

1936 while True: 

1937 if stopWebdavServer(): 

1938 break 

1939 time.sleep(1) 

1940 except KeyboardInterrupt: 

1941 print("Caught Ctrl-C, shutting down...") 

1942 finally: 

1943 server.stop() 

1944 t.join() 

1945 

1946 def _getfreeport(): 

1947 """ 

1948 Determines a free port using sockets. 

1949 """ 

1950 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1951 free_socket.bind(("0.0.0.0", 0)) 

1952 free_socket.listen() 

1953 port = free_socket.getsockname()[1] 

1954 free_socket.close() 

1955 return port 

1956 

1957 

1958class PosixDatastoreTransfers(unittest.TestCase): 

1959 """Test data transfers between butlers. 

1960 

1961 Test for different managers. UUID to UUID and integer to integer are 

1962 tested. UUID to integer is not supported since we do not currently 

1963 want to allow that. Integer to UUID is supported with the caveat 

1964 that UUID4 will be generated and this will be incorrect for raw 

1965 dataset types. The test ignores that. 

1966 """ 

1967 

1968 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1969 

1970 @classmethod 

1971 def setUpClass(cls): 

1972 cls.storageClassFactory = StorageClassFactory() 

1973 cls.storageClassFactory.addFromConfig(cls.configFile) 

1974 

1975 def setUp(self): 

1976 self.root = makeTestTempDir(TESTDIR) 

1977 self.config = Config(self.configFile) 

1978 

1979 def tearDown(self): 

1980 removeTestTempDir(self.root) 

1981 

1982 def create_butler(self, manager, label): 

1983 config = Config(self.configFile) 

1984 config["registry", "managers", "datasets"] = manager 

1985 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1986 

1987 def create_butlers(self, manager1, manager2): 

1988 self.source_butler = self.create_butler(manager1, "1") 

1989 self.target_butler = self.create_butler(manager2, "2") 

1990 

1991 def testTransferUuidToUuid(self): 

1992 self.create_butlers( 

1993 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1994 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

1995 ) 

1996 # Setting id_gen_map should have no effect here 

1997 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

1998 

1999 def testTransferIntToInt(self): 

2000 with self.assertWarns(FutureWarning): 

2001 self.create_butlers( 

2002 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2003 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2004 ) 

2005 # int dataset ID only allows UNIQUE 

2006 self.assertButlerTransfers() 

2007 

2008 def testTransferIntToUuid(self): 

2009 with self.assertWarns(FutureWarning): 

2010 self.create_butlers( 

2011 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2012 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2013 ) 

2014 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2015 

2016 def testTransferMissing(self): 

2017 """Test transfers where datastore records are missing. 

2018 

2019 This is how execution butler works. 

2020 """ 

2021 self.create_butlers( 

2022 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2023 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2024 ) 

2025 

2026 # Configure the source butler to allow trust. 

2027 self.source_butler.datastore.trustGetRequest = True 

2028 

2029 self.assertButlerTransfers(purge=True) 

2030 

2031 def testTransferMissingDisassembly(self): 

2032 """Test transfers where datastore records are missing. 

2033 

2034 This is how execution butler works. 

2035 """ 

2036 self.create_butlers( 

2037 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2038 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2039 ) 

2040 

2041 # Configure the source butler to allow trust. 

2042 self.source_butler.datastore.trustGetRequest = True 

2043 

2044 # Test disassembly. 

2045 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2046 

2047 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2048 """Test that a run can be transferred to another butler.""" 

2049 

2050 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2051 datasetTypeName = "random_data" 

2052 

2053 # Test will create 3 collections and we will want to transfer 

2054 # two of those three. 

2055 runs = ["run1", "run2", "other"] 

2056 

2057 # Also want to use two different dataset types to ensure that 

2058 # grouping works. 

2059 datasetTypeNames = ["random_data", "random_data_2"] 

2060 

2061 # Create the run collections in the source butler. 

2062 for run in runs: 

2063 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2064 

2065 # Create dimensions in both butlers (transfer will not create them). 

2066 n_exposures = 30 

2067 for butler in (self.source_butler, self.target_butler): 

2068 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2069 butler.registry.insertDimensionData( 

2070 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2071 ) 

2072 butler.registry.insertDimensionData( 

2073 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2074 ) 

2075 

2076 for i in range(n_exposures): 

2077 butler.registry.insertDimensionData( 

2078 "exposure", 

2079 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2080 ) 

2081 

2082 # Create dataset types in the source butler. 

2083 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2084 for datasetTypeName in datasetTypeNames: 

2085 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2086 self.source_butler.registry.registerDatasetType(datasetType) 

2087 

2088 # Write a dataset to an unrelated run -- this will ensure that 

2089 # we are rewriting integer dataset ids in the target if necessary. 

2090 # Will not be relevant for UUID. 

2091 run = "distraction" 

2092 butler = Butler(butler=self.source_butler, run=run) 

2093 butler.put( 

2094 makeExampleMetrics(), 

2095 datasetTypeName, 

2096 exposure=1, 

2097 instrument="DummyCamComp", 

2098 physical_filter="d-r", 

2099 ) 

2100 

2101 # Write some example metrics to the source 

2102 butler = Butler(butler=self.source_butler) 

2103 

2104 # Set of DatasetRefs that should be in the list of refs to transfer 

2105 # but which will not be transferred. 

2106 deleted = set() 

2107 

2108 n_expected = 20 # Number of datasets expected to be transferred 

2109 source_refs = [] 

2110 for i in range(n_exposures): 

2111 # Put a third of datasets into each collection, only retain 

2112 # two thirds. 

2113 index = i % 3 

2114 run = runs[index] 

2115 datasetTypeName = datasetTypeNames[i % 2] 

2116 

2117 metric_data = { 

2118 "summary": {"counter": i}, 

2119 "output": {"text": "metric"}, 

2120 "data": [2 * x for x in range(i)], 

2121 } 

2122 metric = MetricsExample(**metric_data) 

2123 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2124 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2125 

2126 # Remove the datastore record using low-level API 

2127 if purge: 

2128 # Remove records for a fraction. 

2129 if index == 1: 

2130 

2131 # For one of these delete the file as well. 

2132 # This allows the "missing" code to filter the 

2133 # file out. 

2134 if not deleted: 

2135 primary, uris = butler.datastore.getURIs(ref) 

2136 if primary: 

2137 primary.remove() 

2138 for uri in uris.values(): 

2139 uri.remove() 

2140 n_expected -= 1 

2141 deleted.add(ref) 

2142 

2143 # Remove the datastore record. 

2144 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2145 

2146 if index < 2: 

2147 source_refs.append(ref) 

2148 if ref not in deleted: 

2149 new_metric = butler.get(ref.unresolved(), collections=run) 

2150 self.assertEqual(new_metric, metric) 

2151 

2152 # Create some bad dataset types to ensure we check for inconsistent 

2153 # definitions. 

2154 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2155 for datasetTypeName in datasetTypeNames: 

2156 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2157 self.target_butler.registry.registerDatasetType(datasetType) 

2158 with self.assertRaises(ConflictingDefinitionError): 

2159 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2160 # And remove the bad definitions. 

2161 for datasetTypeName in datasetTypeNames: 

2162 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2163 

2164 # Transfer without creating dataset types should fail. 

2165 with self.assertRaises(KeyError): 

2166 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2167 

2168 # Now transfer them to the second butler 

2169 with self.assertLogs(level=logging.DEBUG) as cm: 

2170 transferred = self.target_butler.transfer_from( 

2171 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2172 ) 

2173 self.assertEqual(len(transferred), n_expected) 

2174 log_output = ";".join(cm.output) 

2175 self.assertIn("found in datastore for chunk", log_output) 

2176 self.assertIn("Creating output run", log_output) 

2177 

2178 # Do the transfer twice to ensure that it will do nothing extra. 

2179 # Only do this if purge=True because it does not work for int 

2180 # dataset_id. 

2181 if purge: 

2182 # This should not need to register dataset types. 

2183 transferred = self.target_butler.transfer_from( 

2184 self.source_butler, source_refs, id_gen_map=id_gen_map 

2185 ) 

2186 self.assertEqual(len(transferred), n_expected) 

2187 

2188 # Also do an explicit low-level transfer to trigger some 

2189 # edge cases. 

2190 with self.assertLogs(level=logging.DEBUG) as cm: 

2191 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2192 log_output = ";".join(cm.output) 

2193 self.assertIn("no file artifacts exist", log_output) 

2194 

2195 with self.assertRaises(TypeError): 

2196 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2197 

2198 with self.assertRaises(ValueError): 

2199 self.target_butler.datastore.transfer_from( 

2200 self.source_butler.datastore, source_refs, transfer="split" 

2201 ) 

2202 

2203 # Now try to get the same refs from the new butler. 

2204 for ref in source_refs: 

2205 if ref not in deleted: 

2206 unresolved_ref = ref.unresolved() 

2207 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2208 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2209 self.assertEqual(new_metric, old_metric) 

2210 

2211 # Now prune run2 collection and create instead a CHAINED collection. 

2212 # This should block the transfer. 

2213 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2214 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2215 with self.assertRaises(CollectionTypeError): 

2216 # Re-importing the run1 datasets can be problematic if they 

2217 # use integer IDs so filter those out. 

2218 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2219 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2220 

2221 

2222if __name__ == "__main__": 2222 ↛ 2223line 2222 didn't jump to line 2223, because the condition on line 2222 was never true

2223 unittest.main()