Coverage for tests/test_butler.py: 14%

1211 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 18:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionSearch, 

75 CollectionType, 

76 Config, 

77 DatasetIdGenEnum, 

78 DatasetRef, 

79 DatasetType, 

80 FileDataset, 

81 FileTemplate, 

82 FileTemplateValidationError, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

88from lsst.daf.butler.registry import ( 

89 CollectionError, 

90 CollectionTypeError, 

91 ConflictingDefinitionError, 

92 DataIdValueError, 

93 MissingCollectionError, 

94) 

95from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

96from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

97from lsst.resources import ResourcePath 

98from lsst.resources.http import _is_webdav_endpoint 

99from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

100from lsst.utils import doImport 

101from lsst.utils.introspection import get_full_type_name 

102 

103TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

104 

105 

106def makeExampleMetrics(): 

107 return MetricsExample( 

108 {"AM1": 5.2, "AM2": 30.6}, 

109 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

110 [563, 234, 456.7, 752, 8, 9, 27], 

111 ) 

112 

113 

114class TransactionTestError(Exception): 

115 """Specific error for testing transactions, to prevent misdiagnosing 

116 that might otherwise occur when a standard exception is used. 

117 """ 

118 

119 pass 

120 

121 

122class ButlerConfigTests(unittest.TestCase): 

123 """Simple tests for ButlerConfig that are not tested in any other test 

124 cases.""" 

125 

126 def testSearchPath(self): 

127 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

128 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

129 config1 = ButlerConfig(configFile) 

130 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

131 

132 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

133 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

134 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

135 self.assertIn("testConfigs", "\n".join(cm.output)) 

136 

137 key = ("datastore", "records", "table") 

138 self.assertNotEqual(config1[key], config2[key]) 

139 self.assertEqual(config2[key], "override_record") 

140 

141 

142class ButlerPutGetTests: 

143 """Helper method for running a suite of put/get tests from different 

144 butler configurations.""" 

145 

146 root = None 

147 default_run = "ingésτ😺" 

148 

149 @staticmethod 

150 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

151 """Create a DatasetType and register it""" 

152 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

153 registry.registerDatasetType(datasetType) 

154 return datasetType 

155 

156 @classmethod 

157 def setUpClass(cls): 

158 cls.storageClassFactory = StorageClassFactory() 

159 cls.storageClassFactory.addFromConfig(cls.configFile) 

160 

161 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

162 datasetType = datasetRef.datasetType 

163 dataId = datasetRef.dataId 

164 deferred = butler.getDirectDeferred(datasetRef) 

165 

166 for component in components: 

167 compTypeName = datasetType.componentTypeName(component) 

168 result = butler.get(compTypeName, dataId, collections=collections) 

169 self.assertEqual(result, getattr(reference, component)) 

170 result_deferred = deferred.get(component=component) 

171 self.assertEqual(result_deferred, result) 

172 

173 def tearDown(self): 

174 removeTestTempDir(self.root) 

175 

176 def create_butler(self, run, storageClass, datasetTypeName): 

177 butler = Butler(self.tmpConfigFile, run=run) 

178 

179 collections = set(butler.registry.queryCollections()) 

180 self.assertEqual(collections, set([run])) 

181 

182 # Create and register a DatasetType 

183 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

184 

185 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

186 

187 # Add needed Dimensions 

188 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

189 butler.registry.insertDimensionData( 

190 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

191 ) 

192 butler.registry.insertDimensionData( 

193 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

194 ) 

195 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

196 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

197 butler.registry.insertDimensionData( 

198 "visit", 

199 { 

200 "instrument": "DummyCamComp", 

201 "id": 423, 

202 "name": "fourtwentythree", 

203 "physical_filter": "d-r", 

204 "visit_system": 1, 

205 "datetime_begin": visit_start, 

206 "datetime_end": visit_end, 

207 }, 

208 ) 

209 

210 # Add more visits for some later tests 

211 for visit_id in (424, 425): 

212 butler.registry.insertDimensionData( 

213 "visit", 

214 { 

215 "instrument": "DummyCamComp", 

216 "id": visit_id, 

217 "name": f"fourtwentyfour_{visit_id}", 

218 "physical_filter": "d-r", 

219 "visit_system": 1, 

220 }, 

221 ) 

222 return butler, datasetType 

223 

224 def runPutGetTest(self, storageClass, datasetTypeName): 

225 # New datasets will be added to run and tag, but we will only look in 

226 # tag when looking up datasets. 

227 run = self.default_run 

228 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

229 

230 # Create and store a dataset 

231 metric = makeExampleMetrics() 

232 dataId = {"instrument": "DummyCamComp", "visit": 423} 

233 

234 # Create a DatasetRef for put 

235 refIn = DatasetRef(datasetType, dataId, id=None) 

236 

237 # Put with a preexisting id should fail 

238 with self.assertRaises(ValueError): 

239 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

240 

241 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

242 # and once with a DatasetType 

243 

244 # Keep track of any collections we add and do not clean up 

245 expected_collections = {run} 

246 

247 counter = 0 

248 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

249 # Since we are using subTest we can get cascading failures 

250 # here with the first attempt failing and the others failing 

251 # immediately because the dataset already exists. Work around 

252 # this by using a distinct run collection each time 

253 counter += 1 

254 this_run = f"put_run_{counter}" 

255 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

256 expected_collections.update({this_run}) 

257 

258 with self.subTest(args=args): 

259 ref = butler.put(metric, *args, run=this_run) 

260 self.assertIsInstance(ref, DatasetRef) 

261 

262 # Test getDirect 

263 metricOut = butler.getDirect(ref) 

264 self.assertEqual(metric, metricOut) 

265 # Test get 

266 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

267 self.assertEqual(metric, metricOut) 

268 # Test get with a datasetRef 

269 metricOut = butler.get(ref, collections=this_run) 

270 self.assertEqual(metric, metricOut) 

271 # Test getDeferred with dataId 

272 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

273 self.assertEqual(metric, metricOut) 

274 # Test getDeferred with a datasetRef 

275 metricOut = butler.getDeferred(ref, collections=this_run).get() 

276 self.assertEqual(metric, metricOut) 

277 # and deferred direct with ref 

278 metricOut = butler.getDirectDeferred(ref).get() 

279 self.assertEqual(metric, metricOut) 

280 

281 # Check we can get components 

282 if storageClass.isComposite(): 

283 self.assertGetComponents( 

284 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

285 ) 

286 

287 # Can the artifacts themselves be retrieved? 

288 if not butler.datastore.isEphemeral: 

289 root_uri = ResourcePath(self.root) 

290 

291 for preserve_path in (True, False): 

292 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

293 # Use copy so that we can test that overwrite 

294 # protection works (using "auto" for File URIs would 

295 # use hard links and subsequent transfer would work 

296 # because it knows they are the same file). 

297 transferred = butler.retrieveArtifacts( 

298 [ref], destination, preserve_path=preserve_path, transfer="copy" 

299 ) 

300 self.assertGreater(len(transferred), 0) 

301 artifacts = list(ResourcePath.findFileResources([destination])) 

302 self.assertEqual(set(transferred), set(artifacts)) 

303 

304 for artifact in transferred: 

305 path_in_destination = artifact.relative_to(destination) 

306 self.assertIsNotNone(path_in_destination) 

307 

308 # when path is not preserved there should not be 

309 # any path separators. 

310 num_seps = path_in_destination.count("/") 

311 if preserve_path: 

312 self.assertGreater(num_seps, 0) 

313 else: 

314 self.assertEqual(num_seps, 0) 

315 

316 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

317 n_uris = len(secondary_uris) 

318 if primary_uri: 

319 n_uris += 1 

320 self.assertEqual( 

321 len(artifacts), 

322 n_uris, 

323 "Comparing expected artifacts vs actual:" 

324 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

325 ) 

326 

327 if preserve_path: 

328 # No need to run these twice 

329 with self.assertRaises(ValueError): 

330 butler.retrieveArtifacts([ref], destination, transfer="move") 

331 

332 with self.assertRaises(FileExistsError): 

333 butler.retrieveArtifacts([ref], destination) 

334 

335 transferred_again = butler.retrieveArtifacts( 

336 [ref], destination, preserve_path=preserve_path, overwrite=True 

337 ) 

338 self.assertEqual(set(transferred_again), set(transferred)) 

339 

340 # Now remove the dataset completely. 

341 butler.pruneDatasets([ref], purge=True, unstore=True) 

342 # Lookup with original args should still fail. 

343 with self.assertRaises(LookupError): 

344 butler.datasetExists(*args, collections=this_run) 

345 # getDirect() should still fail. 

346 with self.assertRaises(FileNotFoundError): 

347 butler.getDirect(ref) 

348 # Registry shouldn't be able to find it by dataset_id anymore. 

349 self.assertIsNone(butler.registry.getDataset(ref.id)) 

350 

351 # Do explicit registry removal since we know they are 

352 # empty 

353 butler.registry.removeCollection(this_run) 

354 expected_collections.remove(this_run) 

355 

356 # Put the dataset again, since the last thing we did was remove it 

357 # and we want to use the default collection. 

358 ref = butler.put(metric, refIn) 

359 

360 # Get with parameters 

361 stop = 4 

362 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 # getDeferred with parameters 

368 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

369 self.assertNotEqual(metric, sliced) 

370 self.assertEqual(metric.summary, sliced.summary) 

371 self.assertEqual(metric.output, sliced.output) 

372 self.assertEqual(metric.data[:stop], sliced.data) 

373 # getDeferred with deferred parameters 

374 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

375 self.assertNotEqual(metric, sliced) 

376 self.assertEqual(metric.summary, sliced.summary) 

377 self.assertEqual(metric.output, sliced.output) 

378 self.assertEqual(metric.data[:stop], sliced.data) 

379 

380 if storageClass.isComposite(): 

381 # Check that components can be retrieved 

382 metricOut = butler.get(ref.datasetType.name, dataId) 

383 compNameS = ref.datasetType.componentTypeName("summary") 

384 compNameD = ref.datasetType.componentTypeName("data") 

385 summary = butler.get(compNameS, dataId) 

386 self.assertEqual(summary, metric.summary) 

387 data = butler.get(compNameD, dataId) 

388 self.assertEqual(data, metric.data) 

389 

390 if "counter" in storageClass.derivedComponents: 

391 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

392 self.assertEqual(count, len(data)) 

393 

394 count = butler.get( 

395 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

396 ) 

397 self.assertEqual(count, stop) 

398 

399 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

400 summary = butler.getDirect(compRef) 

401 self.assertEqual(summary, metric.summary) 

402 

403 # Create a Dataset type that has the same name but is inconsistent. 

404 inconsistentDatasetType = DatasetType( 

405 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

406 ) 

407 

408 # Getting with a dataset type that does not match registry fails 

409 with self.assertRaises(ValueError): 

410 butler.get(inconsistentDatasetType, dataId) 

411 

412 # Combining a DatasetRef with a dataId should fail 

413 with self.assertRaises(ValueError): 

414 butler.get(ref, dataId) 

415 # Getting with an explicit ref should fail if the id doesn't match 

416 with self.assertRaises(ValueError): 

417 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

418 

419 # Getting a dataset with unknown parameters should fail 

420 with self.assertRaises(KeyError): 

421 butler.get(ref, parameters={"unsupported": True}) 

422 

423 # Check we have a collection 

424 collections = set(butler.registry.queryCollections()) 

425 self.assertEqual(collections, expected_collections) 

426 

427 # Clean up to check that we can remove something that may have 

428 # already had a component removed 

429 butler.pruneDatasets([ref], unstore=True, purge=True) 

430 

431 # Check that we can configure a butler to accept a put even 

432 # if it already has the dataset in registry. 

433 ref = butler.put(metric, refIn) 

434 

435 # Repeat put will fail. 

436 with self.assertRaises(ConflictingDefinitionError): 

437 butler.put(metric, refIn) 

438 

439 # Remove the datastore entry. 

440 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

441 

442 # Put will still fail 

443 with self.assertRaises(ConflictingDefinitionError): 

444 butler.put(metric, refIn) 

445 

446 # Allow the put to succeed 

447 butler._allow_put_of_predefined_dataset = True 

448 ref2 = butler.put(metric, refIn) 

449 self.assertEqual(ref2.id, ref.id) 

450 

451 # A second put will still fail but with a different exception 

452 # than before. 

453 with self.assertRaises(ConflictingDefinitionError): 

454 butler.put(metric, refIn) 

455 

456 # Reset the flag to avoid confusion 

457 butler._allow_put_of_predefined_dataset = False 

458 

459 # Leave the dataset in place since some downstream tests require 

460 # something to be present 

461 

462 return butler 

463 

464 def testDeferredCollectionPassing(self): 

465 # Construct a butler with no run or collection, but make it writeable. 

466 butler = Butler(self.tmpConfigFile, writeable=True) 

467 # Create and register a DatasetType 

468 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

469 datasetType = self.addDatasetType( 

470 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

471 ) 

472 # Add needed Dimensions 

473 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

474 butler.registry.insertDimensionData( 

475 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

476 ) 

477 butler.registry.insertDimensionData( 

478 "visit", 

479 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

480 ) 

481 dataId = {"instrument": "DummyCamComp", "visit": 423} 

482 # Create dataset. 

483 metric = makeExampleMetrics() 

484 # Register a new run and put dataset. 

485 run = "deferred" 

486 self.assertTrue(butler.registry.registerRun(run)) 

487 # Second time it will be allowed but indicate no-op 

488 self.assertFalse(butler.registry.registerRun(run)) 

489 ref = butler.put(metric, datasetType, dataId, run=run) 

490 # Putting with no run should fail with TypeError. 

491 with self.assertRaises(CollectionError): 

492 butler.put(metric, datasetType, dataId) 

493 # Dataset should exist. 

494 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

495 # We should be able to get the dataset back, but with and without 

496 # a deferred dataset handle. 

497 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

498 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

499 # Trying to find the dataset without any collection is a TypeError. 

500 with self.assertRaises(CollectionError): 

501 butler.datasetExists(datasetType, dataId) 

502 with self.assertRaises(CollectionError): 

503 butler.get(datasetType, dataId) 

504 # Associate the dataset with a different collection. 

505 butler.registry.registerCollection("tagged") 

506 butler.registry.associate("tagged", [ref]) 

507 # Deleting the dataset from the new collection should make it findable 

508 # in the original collection. 

509 butler.pruneDatasets([ref], tags=["tagged"]) 

510 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

511 

512 

513class ButlerTests(ButlerPutGetTests): 

514 """Tests for Butler.""" 

515 

516 useTempRoot = True 

517 

518 def setUp(self): 

519 """Create a new butler root for each test.""" 

520 self.root = makeTestTempDir(TESTDIR) 

521 Butler.makeRepo(self.root, config=Config(self.configFile)) 

522 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

523 

524 def testConstructor(self): 

525 """Independent test of constructor.""" 

526 butler = Butler(self.tmpConfigFile, run=self.default_run) 

527 self.assertIsInstance(butler, Butler) 

528 

529 # Check that butler.yaml is added automatically. 

530 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

531 config_dir = self.tmpConfigFile[: -len(end)] 

532 butler = Butler(config_dir, run=self.default_run) 

533 self.assertIsInstance(butler, Butler) 

534 

535 # Even with a ResourcePath. 

536 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

537 self.assertIsInstance(butler, Butler) 

538 

539 collections = set(butler.registry.queryCollections()) 

540 self.assertEqual(collections, {self.default_run}) 

541 

542 # Check that some special characters can be included in run name. 

543 special_run = "u@b.c-A" 

544 butler_special = Butler(butler=butler, run=special_run) 

545 collections = set(butler_special.registry.queryCollections("*@*")) 

546 self.assertEqual(collections, {special_run}) 

547 

548 butler2 = Butler(butler=butler, collections=["other"]) 

549 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

550 self.assertIsNone(butler2.run) 

551 self.assertIs(butler.datastore, butler2.datastore) 

552 

553 # Test that we can use an environment variable to find this 

554 # repository. 

555 butler_index = Config() 

556 butler_index["label"] = self.tmpConfigFile 

557 for suffix in (".yaml", ".json"): 

558 # Ensure that the content differs so that we know that 

559 # we aren't reusing the cache. 

560 bad_label = f"s3://bucket/not_real{suffix}" 

561 butler_index["bad_label"] = bad_label 

562 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

563 butler_index.dumpToUri(temp_file) 

564 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

565 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

566 uri = Butler.get_repo_uri("bad_label") 

567 self.assertEqual(uri, ResourcePath(bad_label)) 

568 uri = Butler.get_repo_uri("label") 

569 butler = Butler(uri, writeable=False) 

570 self.assertIsInstance(butler, Butler) 

571 butler = Butler("label", writeable=False) 

572 self.assertIsInstance(butler, Butler) 

573 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

574 Butler("not_there", writeable=False) 

575 with self.assertRaises(KeyError) as cm: 

576 Butler.get_repo_uri("missing") 

577 self.assertIn("not known to", str(cm.exception)) 

578 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

579 with self.assertRaises(FileNotFoundError): 

580 Butler.get_repo_uri("label") 

581 self.assertEqual(Butler.get_known_repos(), set()) 

582 with self.assertRaises(KeyError) as cm: 

583 # No environment variable set. 

584 Butler.get_repo_uri("label") 

585 self.assertIn("No repository index defined", str(cm.exception)) 

586 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

587 # No aliases registered. 

588 Butler("not_there") 

589 self.assertEqual(Butler.get_known_repos(), set()) 

590 

591 def testBasicPutGet(self): 

592 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

593 self.runPutGetTest(storageClass, "test_metric") 

594 

595 def testCompositePutGetConcrete(self): 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testIngest(self): 

658 butler = Butler(self.tmpConfigFile, run=self.default_run) 

659 

660 # Create and register a DatasetType 

661 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

662 

663 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

664 datasetTypeName = "metric" 

665 

666 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

667 

668 # Add needed Dimensions 

669 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

670 butler.registry.insertDimensionData( 

671 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

672 ) 

673 for detector in (1, 2): 

674 butler.registry.insertDimensionData( 

675 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

676 ) 

677 

678 butler.registry.insertDimensionData( 

679 "visit", 

680 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

681 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

682 ) 

683 

684 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

685 dataRoot = os.path.join(TESTDIR, "data", "basic") 

686 datasets = [] 

687 for detector in (1, 2): 

688 detector_name = f"detector_{detector}" 

689 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

690 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

691 # Create a DatasetRef for ingest 

692 refIn = DatasetRef(datasetType, dataId, id=None) 

693 

694 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

695 

696 butler.ingest(*datasets, transfer="copy") 

697 

698 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

699 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

700 

701 metrics1 = butler.get(datasetTypeName, dataId1) 

702 metrics2 = butler.get(datasetTypeName, dataId2) 

703 self.assertNotEqual(metrics1, metrics2) 

704 

705 # Compare URIs 

706 uri1 = butler.getURI(datasetTypeName, dataId1) 

707 uri2 = butler.getURI(datasetTypeName, dataId2) 

708 self.assertNotEqual(uri1, uri2) 

709 

710 # Now do a multi-dataset but single file ingest 

711 metricFile = os.path.join(dataRoot, "detectors.yaml") 

712 refs = [] 

713 for detector in (1, 2): 

714 detector_name = f"detector_{detector}" 

715 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

716 # Create a DatasetRef for ingest 

717 refs.append(DatasetRef(datasetType, dataId, id=None)) 

718 

719 datasets = [] 

720 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

721 

722 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

723 

724 # Check that the datastore recorded no file size. 

725 # Not all datastores can support this. 

726 try: 

727 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

728 self.assertEqual(infos[0].file_size, -1) 

729 except AttributeError: 

730 pass 

731 

732 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

733 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

734 

735 multi1 = butler.get(datasetTypeName, dataId1) 

736 multi2 = butler.get(datasetTypeName, dataId2) 

737 

738 self.assertEqual(multi1, metrics1) 

739 self.assertEqual(multi2, metrics2) 

740 

741 # Compare URIs 

742 uri1 = butler.getURI(datasetTypeName, dataId1) 

743 uri2 = butler.getURI(datasetTypeName, dataId2) 

744 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

745 

746 # Test that removing one does not break the second 

747 # This line will issue a warning log message for a ChainedDatastore 

748 # that uses an InMemoryDatastore since in-memory can not ingest 

749 # files. 

750 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

751 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

752 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

753 multi2b = butler.get(datasetTypeName, dataId2) 

754 self.assertEqual(multi2, multi2b) 

755 

756 def testPruneCollections(self): 

757 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

758 butler = Butler(self.tmpConfigFile, writeable=True) 

759 # Load registry data with dimensions to hang datasets off of. 

760 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

761 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

762 # Add some RUN-type collections. 

763 run1 = "run1" 

764 butler.registry.registerRun(run1) 

765 run2 = "run2" 

766 butler.registry.registerRun(run2) 

767 # put some datasets. ref1 and ref2 have the same data ID, and are in 

768 # different runs. ref3 has a different data ID. 

769 metric = makeExampleMetrics() 

770 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

771 datasetType = self.addDatasetType( 

772 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

773 ) 

774 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

775 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

776 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

777 

778 # Try to delete a RUN collection without purge, or with purge and not 

779 # unstore. 

780 with self.assertRaises(TypeError): 

781 butler.pruneCollection(run1) 

782 with self.assertRaises(TypeError): 

783 butler.pruneCollection(run2, purge=True) 

784 # Add a TAGGED collection and associate ref3 only into it. 

785 tag1 = "tag1" 

786 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

787 self.assertTrue(registered) 

788 # Registering a second time should be allowed. 

789 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

790 self.assertFalse(registered) 

791 butler.registry.associate(tag1, [ref3]) 

792 # Add a CHAINED collection that searches run1 and then run2. It 

793 # logically contains only ref1, because ref2 is shadowed due to them 

794 # having the same data ID and dataset type. 

795 chain1 = "chain1" 

796 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

797 butler.registry.setCollectionChain(chain1, [run1, run2]) 

798 # Try to delete RUN collections, which should fail with complete 

799 # rollback because they're still referenced by the CHAINED 

800 # collection. 

801 with self.assertRaises(Exception): 

802 butler.pruneCollection(run1, pruge=True, unstore=True) 

803 with self.assertRaises(Exception): 

804 butler.pruneCollection(run2, pruge=True, unstore=True) 

805 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

806 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

807 self.assertTrue(existence[ref1]) 

808 self.assertTrue(existence[ref2]) 

809 self.assertTrue(existence[ref3]) 

810 # Try to delete CHAINED and TAGGED collections with purge; should not 

811 # work. 

812 with self.assertRaises(TypeError): 

813 butler.pruneCollection(tag1, purge=True, unstore=True) 

814 with self.assertRaises(TypeError): 

815 butler.pruneCollection(chain1, purge=True, unstore=True) 

816 # Remove the tagged collection with unstore=False. This should not 

817 # affect the datasets. 

818 butler.pruneCollection(tag1) 

819 with self.assertRaises(MissingCollectionError): 

820 butler.registry.getCollectionType(tag1) 

821 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

822 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

823 self.assertTrue(existence[ref1]) 

824 self.assertTrue(existence[ref2]) 

825 self.assertTrue(existence[ref3]) 

826 # Add the tagged collection back in, and remove it with unstore=True. 

827 # This should remove ref3 only from the datastore. 

828 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

829 butler.registry.associate(tag1, [ref3]) 

830 butler.pruneCollection(tag1, unstore=True) 

831 with self.assertRaises(MissingCollectionError): 

832 butler.registry.getCollectionType(tag1) 

833 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

834 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

835 self.assertTrue(existence[ref1]) 

836 self.assertTrue(existence[ref2]) 

837 self.assertFalse(existence[ref3]) 

838 # Delete the chain with unstore=False. The datasets should not be 

839 # affected at all. 

840 butler.pruneCollection(chain1) 

841 with self.assertRaises(MissingCollectionError): 

842 butler.registry.getCollectionType(chain1) 

843 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

844 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

845 self.assertTrue(existence[ref1]) 

846 self.assertTrue(existence[ref2]) 

847 self.assertFalse(existence[ref3]) 

848 # Redefine and then delete the chain with unstore=True. Only ref1 

849 # should be unstored (ref3 has already been unstored, but otherwise 

850 # would be now). 

851 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

852 butler.registry.setCollectionChain(chain1, [run1, run2]) 

853 butler.pruneCollection(chain1, unstore=True) 

854 with self.assertRaises(MissingCollectionError): 

855 butler.registry.getCollectionType(chain1) 

856 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

857 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

858 self.assertFalse(existence[ref1]) 

859 self.assertTrue(existence[ref2]) 

860 self.assertFalse(existence[ref3]) 

861 # Remove run1. This removes ref1 and ref3 from the registry (they're 

862 # already gone from the datastore, which is fine). 

863 butler.pruneCollection(run1, purge=True, unstore=True) 

864 with self.assertRaises(MissingCollectionError): 

865 butler.registry.getCollectionType(run1) 

866 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

867 self.assertTrue(butler.datastore.exists(ref2)) 

868 # Remove run2. This removes ref2 from the registry and the datastore. 

869 butler.pruneCollection(run2, purge=True, unstore=True) 

870 with self.assertRaises(MissingCollectionError): 

871 butler.registry.getCollectionType(run2) 

872 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

873 

874 # Now that the collections have been pruned we can remove the 

875 # dataset type 

876 butler.registry.removeDatasetType(datasetType.name) 

877 

878 def testPickle(self): 

879 """Test pickle support.""" 

880 butler = Butler(self.tmpConfigFile, run=self.default_run) 

881 butlerOut = pickle.loads(pickle.dumps(butler)) 

882 self.assertIsInstance(butlerOut, Butler) 

883 self.assertEqual(butlerOut._config, butler._config) 

884 self.assertEqual(butlerOut.collections, butler.collections) 

885 self.assertEqual(butlerOut.run, butler.run) 

886 

887 def testGetDatasetTypes(self): 

888 butler = Butler(self.tmpConfigFile, run=self.default_run) 

889 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

890 dimensionEntries = [ 

891 ( 

892 "instrument", 

893 {"instrument": "DummyCam"}, 

894 {"instrument": "DummyHSC"}, 

895 {"instrument": "DummyCamComp"}, 

896 ), 

897 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

898 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

899 ] 

900 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

901 # Add needed Dimensions 

902 for args in dimensionEntries: 

903 butler.registry.insertDimensionData(*args) 

904 

905 # When a DatasetType is added to the registry entries are not created 

906 # for components but querying them can return the components. 

907 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

908 components = set() 

909 for datasetTypeName in datasetTypeNames: 

910 # Create and register a DatasetType 

911 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

912 

913 for componentName in storageClass.components: 

914 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

915 

916 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

917 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

918 

919 # Now that we have some dataset types registered, validate them 

920 butler.validateConfiguration( 

921 ignore=[ 

922 "test_metric_comp", 

923 "metric3", 

924 "metric5", 

925 "calexp", 

926 "DummySC", 

927 "datasetType.component", 

928 "random_data", 

929 "random_data_2", 

930 ] 

931 ) 

932 

933 # Add a new datasetType that will fail template validation 

934 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

935 if self.validationCanFail: 

936 with self.assertRaises(ValidationError): 

937 butler.validateConfiguration() 

938 

939 # Rerun validation but with a subset of dataset type names 

940 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

941 

942 # Rerun validation but ignore the bad datasetType 

943 butler.validateConfiguration( 

944 ignore=[ 

945 "test_metric_comp", 

946 "metric3", 

947 "metric5", 

948 "calexp", 

949 "DummySC", 

950 "datasetType.component", 

951 "random_data", 

952 "random_data_2", 

953 ] 

954 ) 

955 

956 def testTransaction(self): 

957 butler = Butler(self.tmpConfigFile, run=self.default_run) 

958 datasetTypeName = "test_metric" 

959 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

960 dimensionEntries = ( 

961 ("instrument", {"instrument": "DummyCam"}), 

962 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

963 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

964 ) 

965 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

966 metric = makeExampleMetrics() 

967 dataId = {"instrument": "DummyCam", "visit": 42} 

968 # Create and register a DatasetType 

969 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

970 with self.assertRaises(TransactionTestError): 

971 with butler.transaction(): 

972 # Add needed Dimensions 

973 for args in dimensionEntries: 

974 butler.registry.insertDimensionData(*args) 

975 # Store a dataset 

976 ref = butler.put(metric, datasetTypeName, dataId) 

977 self.assertIsInstance(ref, DatasetRef) 

978 # Test getDirect 

979 metricOut = butler.getDirect(ref) 

980 self.assertEqual(metric, metricOut) 

981 # Test get 

982 metricOut = butler.get(datasetTypeName, dataId) 

983 self.assertEqual(metric, metricOut) 

984 # Check we can get components 

985 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

986 raise TransactionTestError("This should roll back the entire transaction") 

987 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

988 butler.registry.expandDataId(dataId) 

989 # Should raise LookupError for missing data ID value 

990 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

991 butler.get(datasetTypeName, dataId) 

992 # Also check explicitly if Dataset entry is missing 

993 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

994 # Direct retrieval should not find the file in the Datastore 

995 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

996 butler.getDirect(ref) 

997 

998 def testMakeRepo(self): 

999 """Test that we can write butler configuration to a new repository via 

1000 the Butler.makeRepo interface and then instantiate a butler from the 

1001 repo root. 

1002 """ 

1003 # Do not run the test if we know this datastore configuration does 

1004 # not support a file system root 

1005 if self.fullConfigKey is None: 

1006 return 

1007 

1008 # create two separate directories 

1009 root1 = tempfile.mkdtemp(dir=self.root) 

1010 root2 = tempfile.mkdtemp(dir=self.root) 

1011 

1012 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1013 limited = Config(self.configFile) 

1014 butler1 = Butler(butlerConfig) 

1015 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1016 full = Config(self.tmpConfigFile) 

1017 butler2 = Butler(butlerConfig) 

1018 # Butlers should have the same configuration regardless of whether 

1019 # defaults were expanded. 

1020 self.assertEqual(butler1._config, butler2._config) 

1021 # Config files loaded directly should not be the same. 

1022 self.assertNotEqual(limited, full) 

1023 # Make sure "limited" doesn't have a few keys we know it should be 

1024 # inheriting from defaults. 

1025 self.assertIn(self.fullConfigKey, full) 

1026 self.assertNotIn(self.fullConfigKey, limited) 

1027 

1028 # Collections don't appear until something is put in them 

1029 collections1 = set(butler1.registry.queryCollections()) 

1030 self.assertEqual(collections1, set()) 

1031 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1032 

1033 # Check that a config with no associated file name will not 

1034 # work properly with relocatable Butler repo 

1035 butlerConfig.configFile = None 

1036 with self.assertRaises(ValueError): 

1037 Butler(butlerConfig) 

1038 

1039 with self.assertRaises(FileExistsError): 

1040 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1041 

1042 def testStringification(self): 

1043 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1044 butlerStr = str(butler) 

1045 

1046 if self.datastoreStr is not None: 

1047 for testStr in self.datastoreStr: 

1048 self.assertIn(testStr, butlerStr) 

1049 if self.registryStr is not None: 

1050 self.assertIn(self.registryStr, butlerStr) 

1051 

1052 datastoreName = butler.datastore.name 

1053 if self.datastoreName is not None: 

1054 for testStr in self.datastoreName: 

1055 self.assertIn(testStr, datastoreName) 

1056 

1057 def testButlerRewriteDataId(self): 

1058 """Test that dataIds can be rewritten based on dimension records.""" 

1059 

1060 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1061 

1062 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1063 datasetTypeName = "random_data" 

1064 

1065 # Create dimension records. 

1066 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1067 butler.registry.insertDimensionData( 

1068 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1069 ) 

1070 butler.registry.insertDimensionData( 

1071 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1072 ) 

1073 

1074 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1075 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1076 butler.registry.registerDatasetType(datasetType) 

1077 

1078 n_exposures = 5 

1079 dayobs = 20210530 

1080 

1081 for i in range(n_exposures): 

1082 butler.registry.insertDimensionData( 

1083 "exposure", 

1084 { 

1085 "instrument": "DummyCamComp", 

1086 "id": i, 

1087 "obs_id": f"exp{i}", 

1088 "seq_num": i, 

1089 "day_obs": dayobs, 

1090 "physical_filter": "d-r", 

1091 }, 

1092 ) 

1093 

1094 # Write some data. 

1095 for i in range(n_exposures): 

1096 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1097 

1098 # Use the seq_num for the put to test rewriting. 

1099 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1100 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1101 

1102 # Check that the exposure is correct in the dataId 

1103 self.assertEqual(ref.dataId["exposure"], i) 

1104 

1105 # and check that we can get the dataset back with the same dataId 

1106 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1107 self.assertEqual(new_metric, metric) 

1108 

1109 

1110class FileDatastoreButlerTests(ButlerTests): 

1111 """Common tests and specialization of ButlerTests for butlers backed 

1112 by datastores that inherit from FileDatastore. 

1113 """ 

1114 

1115 def checkFileExists(self, root, relpath): 

1116 """Checks if file exists at a given path (relative to root). 

1117 

1118 Test testPutTemplates verifies actual physical existance of the files 

1119 in the requested location. 

1120 """ 

1121 uri = ResourcePath(root, forceDirectory=True) 

1122 return uri.join(relpath).exists() 

1123 

1124 def testPutTemplates(self): 

1125 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1126 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1127 

1128 # Add needed Dimensions 

1129 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1130 butler.registry.insertDimensionData( 

1131 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1132 ) 

1133 butler.registry.insertDimensionData( 

1134 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1135 ) 

1136 butler.registry.insertDimensionData( 

1137 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1138 ) 

1139 

1140 # Create and store a dataset 

1141 metric = makeExampleMetrics() 

1142 

1143 # Create two almost-identical DatasetTypes (both will use default 

1144 # template) 

1145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1146 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1147 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1148 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1149 

1150 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1151 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1152 

1153 # Put with exactly the data ID keys needed 

1154 ref = butler.put(metric, "metric1", dataId1) 

1155 uri = butler.getURI(ref) 

1156 self.assertTrue( 

1157 self.checkFileExists( 

1158 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1159 ), 

1160 f"Checking existence of {uri}", 

1161 ) 

1162 

1163 # Check the template based on dimensions 

1164 butler.datastore.templates.validateTemplates([ref]) 

1165 

1166 # Put with extra data ID keys (physical_filter is an optional 

1167 # dependency); should not change template (at least the way we're 

1168 # defining them to behave now; the important thing is that they 

1169 # must be consistent). 

1170 ref = butler.put(metric, "metric2", dataId2) 

1171 uri = butler.getURI(ref) 

1172 self.assertTrue( 

1173 self.checkFileExists( 

1174 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1175 ), 

1176 f"Checking existence of {uri}", 

1177 ) 

1178 

1179 # Check the template based on dimensions 

1180 butler.datastore.templates.validateTemplates([ref]) 

1181 

1182 # Use a template that has a typo in dimension record metadata. 

1183 # Easier to test with a butler that has a ref with records attached. 

1184 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1185 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1186 path = template.format(ref) 

1187 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1188 

1189 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1190 with self.assertRaises(KeyError): 

1191 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1192 template.format(ref) 

1193 

1194 # Now use a file template that will not result in unique filenames 

1195 with self.assertRaises(FileTemplateValidationError): 

1196 butler.put(metric, "metric3", dataId1) 

1197 

1198 def testImportExport(self): 

1199 # Run put/get tests just to create and populate a repo. 

1200 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1201 self.runImportExportTest(storageClass) 

1202 

1203 @unittest.expectedFailure 

1204 def testImportExportVirtualComposite(self): 

1205 # Run put/get tests just to create and populate a repo. 

1206 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1207 self.runImportExportTest(storageClass) 

1208 

1209 def runImportExportTest(self, storageClass): 

1210 """This test does an export to a temp directory and an import back 

1211 into a new temp directory repo. It does not assume a posix datastore""" 

1212 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1213 print("Root:", exportButler.datastore.root) 

1214 # Test that the repo actually has at least one dataset. 

1215 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1216 self.assertGreater(len(datasets), 0) 

1217 # Add a DimensionRecord that's unused by those datasets. 

1218 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1219 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1220 # Export and then import datasets. 

1221 with safeTestTempDir(TESTDIR) as exportDir: 

1222 exportFile = os.path.join(exportDir, "exports.yaml") 

1223 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1224 export.saveDatasets(datasets) 

1225 # Export the same datasets again. This should quietly do 

1226 # nothing because of internal deduplication, and it shouldn't 

1227 # complain about being asked to export the "htm7" elements even 

1228 # though there aren't any in these datasets or in the database. 

1229 export.saveDatasets(datasets, elements=["htm7"]) 

1230 # Save one of the data IDs again; this should be harmless 

1231 # because of internal deduplication. 

1232 export.saveDataIds([datasets[0].dataId]) 

1233 # Save some dimension records directly. 

1234 export.saveDimensionData("skymap", [skymapRecord]) 

1235 self.assertTrue(os.path.exists(exportFile)) 

1236 with safeTestTempDir(TESTDIR) as importDir: 

1237 # We always want this to be a local posix butler 

1238 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1239 # Calling script.butlerImport tests the implementation of the 

1240 # butler command line interface "import" subcommand. Functions 

1241 # in the script folder are generally considered protected and 

1242 # should not be used as public api. 

1243 with open(exportFile, "r") as f: 

1244 script.butlerImport( 

1245 importDir, 

1246 export_file=f, 

1247 directory=exportDir, 

1248 transfer="auto", 

1249 skip_dimensions=None, 

1250 reuse_ids=False, 

1251 ) 

1252 importButler = Butler(importDir, run=self.default_run) 

1253 for ref in datasets: 

1254 with self.subTest(ref=ref): 

1255 # Test for existence by passing in the DatasetType and 

1256 # data ID separately, to avoid lookup by dataset_id. 

1257 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1258 self.assertEqual( 

1259 list(importButler.registry.queryDimensionRecords("skymap")), 

1260 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1261 ) 

1262 

1263 def testRemoveRuns(self): 

1264 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1265 butler = Butler(self.tmpConfigFile, writeable=True) 

1266 # Load registry data with dimensions to hang datasets off of. 

1267 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1268 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1269 # Add some RUN-type collection. 

1270 run1 = "run1" 

1271 butler.registry.registerRun(run1) 

1272 run2 = "run2" 

1273 butler.registry.registerRun(run2) 

1274 # put a dataset in each 

1275 metric = makeExampleMetrics() 

1276 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1277 datasetType = self.addDatasetType( 

1278 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1279 ) 

1280 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1281 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1282 uri1 = butler.getURI(ref1, collections=[run1]) 

1283 uri2 = butler.getURI(ref2, collections=[run2]) 

1284 # Remove from both runs with different values for unstore. 

1285 butler.removeRuns([run1], unstore=True) 

1286 butler.removeRuns([run2], unstore=False) 

1287 # Should be nothing in registry for either one, and datastore should 

1288 # not think either exists. 

1289 with self.assertRaises(MissingCollectionError): 

1290 butler.registry.getCollectionType(run1) 

1291 with self.assertRaises(MissingCollectionError): 

1292 butler.registry.getCollectionType(run2) 

1293 self.assertFalse(butler.datastore.exists(ref1)) 

1294 self.assertFalse(butler.datastore.exists(ref2)) 

1295 # The ref we unstored should be gone according to the URI, but the 

1296 # one we forgot should still be around. 

1297 self.assertFalse(uri1.exists()) 

1298 self.assertTrue(uri2.exists()) 

1299 

1300 

1301class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1302 """PosixDatastore specialization of a butler""" 

1303 

1304 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1305 fullConfigKey = ".datastore.formatters" 

1306 validationCanFail = True 

1307 datastoreStr = ["/tmp"] 

1308 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1309 registryStr = "/gen3.sqlite3" 

1310 

1311 def testPathConstructor(self): 

1312 """Independent test of constructor using PathLike.""" 

1313 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1314 self.assertIsInstance(butler, Butler) 

1315 

1316 # And again with a Path object with the butler yaml 

1317 path = pathlib.Path(self.tmpConfigFile) 

1318 butler = Butler(path, writeable=False) 

1319 self.assertIsInstance(butler, Butler) 

1320 

1321 # And again with a Path object without the butler yaml 

1322 # (making sure we skip it if the tmp config doesn't end 

1323 # in butler.yaml -- which is the case for a subclass) 

1324 if self.tmpConfigFile.endswith("butler.yaml"): 

1325 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1326 butler = Butler(path, writeable=False) 

1327 self.assertIsInstance(butler, Butler) 

1328 

1329 def testExportTransferCopy(self): 

1330 """Test local export using all transfer modes""" 

1331 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1332 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1333 # Test that the repo actually has at least one dataset. 

1334 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1335 self.assertGreater(len(datasets), 0) 

1336 uris = [exportButler.getURI(d) for d in datasets] 

1337 datastoreRoot = exportButler.datastore.root 

1338 

1339 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1340 

1341 for path in pathsInStore: 

1342 # Assume local file system 

1343 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1344 

1345 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1346 with safeTestTempDir(TESTDIR) as exportDir: 

1347 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1348 export.saveDatasets(datasets) 

1349 for path in pathsInStore: 

1350 self.assertTrue( 

1351 self.checkFileExists(exportDir, path), 

1352 f"Check that mode {transfer} exported files", 

1353 ) 

1354 

1355 def testPruneDatasets(self): 

1356 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1357 butler = Butler(self.tmpConfigFile, writeable=True) 

1358 # Load registry data with dimensions to hang datasets off of. 

1359 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1360 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1361 # Add some RUN-type collections. 

1362 run1 = "run1" 

1363 butler.registry.registerRun(run1) 

1364 run2 = "run2" 

1365 butler.registry.registerRun(run2) 

1366 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1367 # different runs. ref3 has a different data ID. 

1368 metric = makeExampleMetrics() 

1369 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1370 datasetType = self.addDatasetType( 

1371 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1372 ) 

1373 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1374 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1375 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1376 

1377 # Simple prune. 

1378 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1379 with self.assertRaises(LookupError): 

1380 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1381 

1382 # Put data back. 

1383 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1384 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1385 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1386 

1387 # Check that in normal mode, deleting the record will lead to 

1388 # trash not touching the file. 

1389 uri1 = butler.datastore.getURI(ref1) 

1390 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1391 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1392 butler.datastore.trash(ref1) 

1393 butler.datastore.emptyTrash() 

1394 self.assertTrue(uri1.exists()) 

1395 uri1.remove() # Clean it up. 

1396 

1397 # Simulate execution butler setup by deleting the datastore 

1398 # record but keeping the file around and trusting. 

1399 butler.datastore.trustGetRequest = True 

1400 uri2 = butler.datastore.getURI(ref2) 

1401 uri3 = butler.datastore.getURI(ref3) 

1402 self.assertTrue(uri2.exists()) 

1403 self.assertTrue(uri3.exists()) 

1404 

1405 # Remove the datastore record. 

1406 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1407 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1408 self.assertTrue(uri2.exists()) 

1409 butler.datastore.trash([ref2, ref3]) 

1410 # Immediate removal for ref2 file 

1411 self.assertFalse(uri2.exists()) 

1412 # But ref3 has to wait for the empty. 

1413 self.assertTrue(uri3.exists()) 

1414 butler.datastore.emptyTrash() 

1415 self.assertFalse(uri3.exists()) 

1416 

1417 # Clear out the datasets from registry. 

1418 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1419 

1420 def testPytypePutCoercion(self): 

1421 """Test python type coercion on Butler.get and put.""" 

1422 

1423 # Store some data with the normal example storage class. 

1424 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1425 datasetTypeName = "test_metric" 

1426 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1427 

1428 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1429 

1430 # Put a dict and this should coerce to a MetricsExample 

1431 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1432 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1433 test_metric = butler.getDirect(metric_ref) 

1434 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1435 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1436 self.assertEqual(test_metric.output, test_dict["output"]) 

1437 

1438 # Check that the put still works if a DatasetType is given with 

1439 # a definition matching this python type. 

1440 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1441 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1442 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1443 self.assertEqual(metric2_ref.datasetType, registry_type) 

1444 

1445 # The get will return the type expected by registry. 

1446 test_metric2 = butler.getDirect(metric2_ref) 

1447 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1448 

1449 # Make a new DatasetRef with the compatible but different DatasetType. 

1450 # This should now return a dict. 

1451 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1452 test_dict2 = butler.getDirect(new_ref) 

1453 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1454 

1455 # Get it again with the wrong dataset type definition using get() 

1456 # rather than getDirect(). This should be consistent with getDirect() 

1457 # behavior and return the type of the DatasetType. 

1458 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1459 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1460 

1461 def testPytypeCoercion(self): 

1462 """Test python type coercion on Butler.get and put.""" 

1463 

1464 # Store some data with the normal example storage class. 

1465 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1466 datasetTypeName = "test_metric" 

1467 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1468 

1469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1470 metric = butler.get(datasetTypeName, dataId=dataId) 

1471 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1472 

1473 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1474 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1475 

1476 # Now need to hack the registry dataset type definition. 

1477 # There is no API for this. 

1478 manager = butler.registry._managers.datasets 

1479 manager._db.update( 

1480 manager._static.dataset_type, 

1481 {"name": datasetTypeName}, 

1482 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1483 ) 

1484 

1485 # Force reset of dataset type cache 

1486 butler.registry.refresh() 

1487 

1488 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1489 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1490 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1491 

1492 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1493 self.assertNotEqual(type(metric_model), type(metric)) 

1494 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1495 

1496 # Put the model and read it back to show that everything now 

1497 # works as normal. 

1498 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1499 metric_model_new = butler.get(metric_ref) 

1500 self.assertEqual(metric_model_new, metric_model) 

1501 

1502 # Hack the storage class again to something that will fail on the 

1503 # get with no conversion class. 

1504 manager._db.update( 

1505 manager._static.dataset_type, 

1506 {"name": datasetTypeName}, 

1507 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1508 ) 

1509 butler.registry.refresh() 

1510 

1511 with self.assertRaises(ValueError): 

1512 butler.get(datasetTypeName, dataId=dataId) 

1513 

1514 

1515@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1516class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1517 """PosixDatastore specialization of a butler using Postgres""" 

1518 

1519 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1520 fullConfigKey = ".datastore.formatters" 

1521 validationCanFail = True 

1522 datastoreStr = ["/tmp"] 

1523 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1524 registryStr = "PostgreSQL@test" 

1525 

1526 @staticmethod 

1527 def _handler(postgresql): 

1528 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1529 with engine.begin() as connection: 

1530 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1531 

1532 @classmethod 

1533 def setUpClass(cls): 

1534 # Create the postgres test server. 

1535 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1536 cache_initialized_db=True, on_initialized=cls._handler 

1537 ) 

1538 super().setUpClass() 

1539 

1540 @classmethod 

1541 def tearDownClass(cls): 

1542 # Clean up any lingering SQLAlchemy engines/connections 

1543 # so they're closed before we shut down the server. 

1544 gc.collect() 

1545 cls.postgresql.clear_cache() 

1546 super().tearDownClass() 

1547 

1548 def setUp(self): 

1549 self.server = self.postgresql() 

1550 

1551 # Need to add a registry section to the config. 

1552 self._temp_config = False 

1553 config = Config(self.configFile) 

1554 config["registry", "db"] = self.server.url() 

1555 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1556 config.dump(fh) 

1557 self.configFile = fh.name 

1558 self._temp_config = True 

1559 super().setUp() 

1560 

1561 def tearDown(self): 

1562 self.server.stop() 

1563 if self._temp_config and os.path.exists(self.configFile): 

1564 os.remove(self.configFile) 

1565 super().tearDown() 

1566 

1567 def testMakeRepo(self): 

1568 # The base class test assumes that it's using sqlite and assumes 

1569 # the config file is acceptable to sqlite. 

1570 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1571 

1572 

1573class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1574 """InMemoryDatastore specialization of a butler""" 

1575 

1576 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1577 fullConfigKey = None 

1578 useTempRoot = False 

1579 validationCanFail = False 

1580 datastoreStr = ["datastore='InMemory"] 

1581 datastoreName = ["InMemoryDatastore@"] 

1582 registryStr = "/gen3.sqlite3" 

1583 

1584 def testIngest(self): 

1585 pass 

1586 

1587 

1588class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1589 """PosixDatastore specialization""" 

1590 

1591 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1592 fullConfigKey = ".datastore.datastores.1.formatters" 

1593 validationCanFail = True 

1594 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1595 datastoreName = [ 

1596 "InMemoryDatastore@", 

1597 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1598 "SecondDatastore", 

1599 ] 

1600 registryStr = "/gen3.sqlite3" 

1601 

1602 

1603class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1604 """Test that a yaml file in one location can refer to a root in another.""" 

1605 

1606 datastoreStr = ["dir1"] 

1607 # Disable the makeRepo test since we are deliberately not using 

1608 # butler.yaml as the config name. 

1609 fullConfigKey = None 

1610 

1611 def setUp(self): 

1612 self.root = makeTestTempDir(TESTDIR) 

1613 

1614 # Make a new repository in one place 

1615 self.dir1 = os.path.join(self.root, "dir1") 

1616 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1617 

1618 # Move the yaml file to a different place and add a "root" 

1619 self.dir2 = os.path.join(self.root, "dir2") 

1620 os.makedirs(self.dir2, exist_ok=True) 

1621 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1622 config = Config(configFile1) 

1623 config["root"] = self.dir1 

1624 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1625 config.dumpToUri(configFile2) 

1626 os.remove(configFile1) 

1627 self.tmpConfigFile = configFile2 

1628 

1629 def testFileLocations(self): 

1630 self.assertNotEqual(self.dir1, self.dir2) 

1631 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1632 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1633 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1634 

1635 

1636class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1637 """Test that a config file created by makeRepo outside of repo works.""" 

1638 

1639 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1640 

1641 def setUp(self): 

1642 self.root = makeTestTempDir(TESTDIR) 

1643 self.root2 = makeTestTempDir(TESTDIR) 

1644 

1645 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1646 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1647 

1648 def tearDown(self): 

1649 if os.path.exists(self.root2): 

1650 shutil.rmtree(self.root2, ignore_errors=True) 

1651 super().tearDown() 

1652 

1653 def testConfigExistence(self): 

1654 c = Config(self.tmpConfigFile) 

1655 uri_config = ResourcePath(c["root"]) 

1656 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1657 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1658 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1659 

1660 def testPutGet(self): 

1661 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1662 self.runPutGetTest(storageClass, "test_metric") 

1663 

1664 

1665class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1666 """Test that a config file created by makeRepo outside of repo works.""" 

1667 

1668 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1669 

1670 def setUp(self): 

1671 self.root = makeTestTempDir(TESTDIR) 

1672 self.root2 = makeTestTempDir(TESTDIR) 

1673 

1674 self.tmpConfigFile = self.root2 

1675 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1676 

1677 def testConfigExistence(self): 

1678 # Append the yaml file else Config constructor does not know the file 

1679 # type. 

1680 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1681 super().testConfigExistence() 

1682 

1683 

1684class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1685 """Test that a config file created by makeRepo outside of repo works.""" 

1686 

1687 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1688 

1689 def setUp(self): 

1690 self.root = makeTestTempDir(TESTDIR) 

1691 self.root2 = makeTestTempDir(TESTDIR) 

1692 

1693 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1694 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1695 

1696 

1697@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1698class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1699 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1700 a local in-memory SqlRegistry. 

1701 """ 

1702 

1703 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1704 fullConfigKey = None 

1705 validationCanFail = True 

1706 

1707 bucketName = "anybucketname" 

1708 """Name of the Bucket that will be used in the tests. The name is read from 

1709 the config file used with the tests during set-up. 

1710 """ 

1711 

1712 root = "butlerRoot/" 

1713 """Root repository directory expected to be used in case useTempRoot=False. 

1714 Otherwise the root is set to a 20 characters long randomly generated string 

1715 during set-up. 

1716 """ 

1717 

1718 datastoreStr = [f"datastore={root}"] 

1719 """Contains all expected root locations in a format expected to be 

1720 returned by Butler stringification. 

1721 """ 

1722 

1723 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1724 """The expected format of the S3 Datastore string.""" 

1725 

1726 registryStr = "/gen3.sqlite3" 

1727 """Expected format of the Registry string.""" 

1728 

1729 mock_s3 = mock_s3() 

1730 """The mocked s3 interface from moto.""" 

1731 

1732 def genRoot(self): 

1733 """Returns a random string of len 20 to serve as a root 

1734 name for the temporary bucket repo. 

1735 

1736 This is equivalent to tempfile.mkdtemp as this is what self.root 

1737 becomes when useTempRoot is True. 

1738 """ 

1739 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1740 return rndstr + "/" 

1741 

1742 def setUp(self): 

1743 config = Config(self.configFile) 

1744 uri = ResourcePath(config[".datastore.datastore.root"]) 

1745 self.bucketName = uri.netloc 

1746 

1747 # Enable S3 mocking of tests. 

1748 self.mock_s3.start() 

1749 

1750 # set up some fake credentials if they do not exist 

1751 self.usingDummyCredentials = setAwsEnvCredentials() 

1752 

1753 if self.useTempRoot: 

1754 self.root = self.genRoot() 

1755 rooturi = f"s3://{self.bucketName}/{self.root}" 

1756 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1757 

1758 # need local folder to store registry database 

1759 self.reg_dir = makeTestTempDir(TESTDIR) 

1760 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1761 

1762 # MOTO needs to know that we expect Bucket bucketname to exist 

1763 # (this used to be the class attribute bucketName) 

1764 s3 = boto3.resource("s3") 

1765 s3.create_bucket(Bucket=self.bucketName) 

1766 

1767 self.datastoreStr = f"datastore={self.root}" 

1768 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1769 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1770 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1771 

1772 def tearDown(self): 

1773 s3 = boto3.resource("s3") 

1774 bucket = s3.Bucket(self.bucketName) 

1775 try: 

1776 bucket.objects.all().delete() 

1777 except botocore.exceptions.ClientError as e: 

1778 if e.response["Error"]["Code"] == "404": 

1779 # the key was not reachable - pass 

1780 pass 

1781 else: 

1782 raise 

1783 

1784 bucket = s3.Bucket(self.bucketName) 

1785 bucket.delete() 

1786 

1787 # Stop the S3 mock. 

1788 self.mock_s3.stop() 

1789 

1790 # unset any potentially set dummy credentials 

1791 if self.usingDummyCredentials: 

1792 unsetAwsEnvCredentials() 

1793 

1794 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1795 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1796 

1797 if self.useTempRoot and os.path.exists(self.root): 

1798 shutil.rmtree(self.root, ignore_errors=True) 

1799 

1800 super().tearDown() 

1801 

1802 

1803@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1804class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1805 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1806 a local in-memory SqlRegistry. 

1807 """ 

1808 

1809 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1810 fullConfigKey = None 

1811 validationCanFail = True 

1812 

1813 serverName = "localhost" 

1814 """Name of the server that will be used in the tests. 

1815 """ 

1816 

1817 portNumber = 8080 

1818 """Port on which the webdav server listens. Automatically chosen 

1819 at setUpClass via the _getfreeport() method 

1820 """ 

1821 

1822 root = "butlerRoot/" 

1823 """Root repository directory expected to be used in case useTempRoot=False. 

1824 Otherwise the root is set to a 20 characters long randomly generated string 

1825 during set-up. 

1826 """ 

1827 

1828 datastoreStr = [f"datastore={root}"] 

1829 """Contains all expected root locations in a format expected to be 

1830 returned by Butler stringification. 

1831 """ 

1832 

1833 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1834 """The expected format of the WebdavDatastore string.""" 

1835 

1836 registryStr = "/gen3.sqlite3" 

1837 """Expected format of the Registry string.""" 

1838 

1839 serverThread = None 

1840 """Thread in which the local webdav server will run""" 

1841 

1842 stopWebdavServer = False 

1843 """This flag will cause the webdav server to 

1844 gracefully shut down when True 

1845 """ 

1846 

1847 def genRoot(self): 

1848 """Returns a random string of len 20 to serve as a root 

1849 name for the temporary bucket repo. 

1850 

1851 This is equivalent to tempfile.mkdtemp as this is what self.root 

1852 becomes when useTempRoot is True. 

1853 """ 

1854 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1855 return rndstr + "/" 

1856 

1857 @classmethod 

1858 def setUpClass(cls): 

1859 # Do the same as inherited class 

1860 cls.storageClassFactory = StorageClassFactory() 

1861 cls.storageClassFactory.addFromConfig(cls.configFile) 

1862 

1863 cls.portNumber = cls._getfreeport() 

1864 # Run a local webdav server on which tests will be run 

1865 cls.serverThread = Thread( 

1866 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1867 ) 

1868 cls.serverThread.start() 

1869 # Wait for it to start 

1870 time.sleep(3) 

1871 

1872 @classmethod 

1873 def tearDownClass(cls): 

1874 # Ask for graceful shut down of the webdav server 

1875 cls.stopWebdavServer = True 

1876 # Wait for the thread to exit 

1877 cls.serverThread.join() 

1878 super().tearDownClass() 

1879 

1880 def setUp(self): 

1881 config = Config(self.configFile) 

1882 

1883 if self.useTempRoot: 

1884 self.root = self.genRoot() 

1885 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1886 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1887 

1888 # need local folder to store registry database 

1889 self.reg_dir = makeTestTempDir(TESTDIR) 

1890 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1891 

1892 self.datastoreStr = f"datastore={self.root}" 

1893 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1894 

1895 if not _is_webdav_endpoint(self.rooturi): 

1896 raise OSError("Webdav server not running properly: cannot run tests.") 

1897 

1898 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1899 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1900 

1901 def tearDown(self): 

1902 # Clear temporary directory 

1903 ResourcePath(self.rooturi).remove() 

1904 ResourcePath(self.rooturi).session.close() 

1905 

1906 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1907 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1908 

1909 if self.useTempRoot and os.path.exists(self.root): 

1910 shutil.rmtree(self.root, ignore_errors=True) 

1911 

1912 super().tearDown() 

1913 

1914 def _serveWebdav(self, port: int, stopWebdavServer): 

1915 """Starts a local webdav-compatible HTTP server, 

1916 Listening on http://localhost:port 

1917 This server only runs when this test class is instantiated, 

1918 and then shuts down. Must be started is a separate thread. 

1919 

1920 Parameters 

1921 ---------- 

1922 port : `int` 

1923 The port number on which the server should listen 

1924 """ 

1925 root_path = gettempdir() 

1926 

1927 config = { 

1928 "host": "0.0.0.0", 

1929 "port": port, 

1930 "provider_mapping": {"/": root_path}, 

1931 "http_authenticator": {"domain_controller": None}, 

1932 "simple_dc": {"user_mapping": {"*": True}}, 

1933 "verbose": 0, 

1934 } 

1935 app = WsgiDAVApp(config) 

1936 

1937 server_args = { 

1938 "bind_addr": (config["host"], config["port"]), 

1939 "wsgi_app": app, 

1940 } 

1941 server = wsgi.Server(**server_args) 

1942 server.prepare() 

1943 

1944 try: 

1945 # Start the actual server in a separate thread 

1946 t = Thread(target=server.serve, daemon=True) 

1947 t.start() 

1948 # watch stopWebdavServer, and gracefully 

1949 # shut down the server when True 

1950 while True: 

1951 if stopWebdavServer(): 

1952 break 

1953 time.sleep(1) 

1954 except KeyboardInterrupt: 

1955 print("Caught Ctrl-C, shutting down...") 

1956 finally: 

1957 server.stop() 

1958 t.join() 

1959 

1960 def _getfreeport(): 

1961 """ 

1962 Determines a free port using sockets. 

1963 """ 

1964 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1965 free_socket.bind(("127.0.0.1", 0)) 

1966 free_socket.listen() 

1967 port = free_socket.getsockname()[1] 

1968 free_socket.close() 

1969 return port 

1970 

1971 

1972class PosixDatastoreTransfers(unittest.TestCase): 

1973 """Test data transfers between butlers. 

1974 

1975 Test for different managers. UUID to UUID and integer to integer are 

1976 tested. UUID to integer is not supported since we do not currently 

1977 want to allow that. Integer to UUID is supported with the caveat 

1978 that UUID4 will be generated and this will be incorrect for raw 

1979 dataset types. The test ignores that. 

1980 """ 

1981 

1982 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1983 

1984 @classmethod 

1985 def setUpClass(cls): 

1986 cls.storageClassFactory = StorageClassFactory() 

1987 cls.storageClassFactory.addFromConfig(cls.configFile) 

1988 

1989 def setUp(self): 

1990 self.root = makeTestTempDir(TESTDIR) 

1991 self.config = Config(self.configFile) 

1992 

1993 def tearDown(self): 

1994 removeTestTempDir(self.root) 

1995 

1996 def create_butler(self, manager, label): 

1997 config = Config(self.configFile) 

1998 config["registry", "managers", "datasets"] = manager 

1999 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2000 

2001 def create_butlers(self, manager1, manager2): 

2002 self.source_butler = self.create_butler(manager1, "1") 

2003 self.target_butler = self.create_butler(manager2, "2") 

2004 

2005 def testTransferUuidToUuid(self): 

2006 self.create_butlers( 

2007 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2008 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2009 ) 

2010 # Setting id_gen_map should have no effect here 

2011 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2012 

2013 def testTransferIntToInt(self): 

2014 with self.assertWarns(FutureWarning): 

2015 self.create_butlers( 

2016 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2017 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2018 ) 

2019 # int dataset ID only allows UNIQUE 

2020 self.assertButlerTransfers() 

2021 

2022 def testTransferIntToUuid(self): 

2023 with self.assertWarns(FutureWarning): 

2024 self.create_butlers( 

2025 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2026 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2027 ) 

2028 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2029 

2030 def testTransferMissing(self): 

2031 """Test transfers where datastore records are missing. 

2032 

2033 This is how execution butler works. 

2034 """ 

2035 self.create_butlers( 

2036 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2037 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2038 ) 

2039 

2040 # Configure the source butler to allow trust. 

2041 self.source_butler.datastore.trustGetRequest = True 

2042 

2043 self.assertButlerTransfers(purge=True) 

2044 

2045 def testTransferMissingDisassembly(self): 

2046 """Test transfers where datastore records are missing. 

2047 

2048 This is how execution butler works. 

2049 """ 

2050 self.create_butlers( 

2051 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2052 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2053 ) 

2054 

2055 # Configure the source butler to allow trust. 

2056 self.source_butler.datastore.trustGetRequest = True 

2057 

2058 # Test disassembly. 

2059 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2060 

2061 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2062 """Test that a run can be transferred to another butler.""" 

2063 

2064 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2065 datasetTypeName = "random_data" 

2066 

2067 # Test will create 3 collections and we will want to transfer 

2068 # two of those three. 

2069 runs = ["run1", "run2", "other"] 

2070 

2071 # Also want to use two different dataset types to ensure that 

2072 # grouping works. 

2073 datasetTypeNames = ["random_data", "random_data_2"] 

2074 

2075 # Create the run collections in the source butler. 

2076 for run in runs: 

2077 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2078 

2079 # Create dimensions in both butlers (transfer will not create them). 

2080 n_exposures = 30 

2081 for butler in (self.source_butler, self.target_butler): 

2082 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2083 butler.registry.insertDimensionData( 

2084 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2085 ) 

2086 butler.registry.insertDimensionData( 

2087 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2088 ) 

2089 

2090 for i in range(n_exposures): 

2091 butler.registry.insertDimensionData( 

2092 "exposure", 

2093 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2094 ) 

2095 

2096 # Create dataset types in the source butler. 

2097 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2098 for datasetTypeName in datasetTypeNames: 

2099 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2100 self.source_butler.registry.registerDatasetType(datasetType) 

2101 

2102 # Write a dataset to an unrelated run -- this will ensure that 

2103 # we are rewriting integer dataset ids in the target if necessary. 

2104 # Will not be relevant for UUID. 

2105 run = "distraction" 

2106 butler = Butler(butler=self.source_butler, run=run) 

2107 butler.put( 

2108 makeExampleMetrics(), 

2109 datasetTypeName, 

2110 exposure=1, 

2111 instrument="DummyCamComp", 

2112 physical_filter="d-r", 

2113 ) 

2114 

2115 # Write some example metrics to the source 

2116 butler = Butler(butler=self.source_butler) 

2117 

2118 # Set of DatasetRefs that should be in the list of refs to transfer 

2119 # but which will not be transferred. 

2120 deleted = set() 

2121 

2122 n_expected = 20 # Number of datasets expected to be transferred 

2123 source_refs = [] 

2124 for i in range(n_exposures): 

2125 # Put a third of datasets into each collection, only retain 

2126 # two thirds. 

2127 index = i % 3 

2128 run = runs[index] 

2129 datasetTypeName = datasetTypeNames[i % 2] 

2130 

2131 metric_data = { 

2132 "summary": {"counter": i}, 

2133 "output": {"text": "metric"}, 

2134 "data": [2 * x for x in range(i)], 

2135 } 

2136 metric = MetricsExample(**metric_data) 

2137 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2138 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2139 

2140 # Remove the datastore record using low-level API 

2141 if purge: 

2142 # Remove records for a fraction. 

2143 if index == 1: 

2144 # For one of these delete the file as well. 

2145 # This allows the "missing" code to filter the 

2146 # file out. 

2147 if not deleted: 

2148 primary, uris = butler.datastore.getURIs(ref) 

2149 if primary: 

2150 primary.remove() 

2151 for uri in uris.values(): 

2152 uri.remove() 

2153 n_expected -= 1 

2154 deleted.add(ref) 

2155 

2156 # Remove the datastore record. 

2157 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2158 

2159 if index < 2: 

2160 source_refs.append(ref) 

2161 if ref not in deleted: 

2162 new_metric = butler.get(ref.unresolved(), collections=run) 

2163 self.assertEqual(new_metric, metric) 

2164 

2165 # Create some bad dataset types to ensure we check for inconsistent 

2166 # definitions. 

2167 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2168 for datasetTypeName in datasetTypeNames: 

2169 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2170 self.target_butler.registry.registerDatasetType(datasetType) 

2171 with self.assertRaises(ConflictingDefinitionError): 

2172 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2173 # And remove the bad definitions. 

2174 for datasetTypeName in datasetTypeNames: 

2175 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2176 

2177 # Transfer without creating dataset types should fail. 

2178 with self.assertRaises(KeyError): 

2179 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2180 

2181 # Now transfer them to the second butler 

2182 with self.assertLogs(level=logging.DEBUG) as cm: 

2183 transferred = self.target_butler.transfer_from( 

2184 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2185 ) 

2186 self.assertEqual(len(transferred), n_expected) 

2187 log_output = ";".join(cm.output) 

2188 self.assertIn("found in datastore for chunk", log_output) 

2189 self.assertIn("Creating output run", log_output) 

2190 

2191 # Do the transfer twice to ensure that it will do nothing extra. 

2192 # Only do this if purge=True because it does not work for int 

2193 # dataset_id. 

2194 if purge: 

2195 # This should not need to register dataset types. 

2196 transferred = self.target_butler.transfer_from( 

2197 self.source_butler, source_refs, id_gen_map=id_gen_map 

2198 ) 

2199 self.assertEqual(len(transferred), n_expected) 

2200 

2201 # Also do an explicit low-level transfer to trigger some 

2202 # edge cases. 

2203 with self.assertLogs(level=logging.DEBUG) as cm: 

2204 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2205 log_output = ";".join(cm.output) 

2206 self.assertIn("no file artifacts exist", log_output) 

2207 

2208 with self.assertRaises(TypeError): 

2209 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2210 

2211 with self.assertRaises(ValueError): 

2212 self.target_butler.datastore.transfer_from( 

2213 self.source_butler.datastore, source_refs, transfer="split" 

2214 ) 

2215 

2216 # Now try to get the same refs from the new butler. 

2217 for ref in source_refs: 

2218 if ref not in deleted: 

2219 unresolved_ref = ref.unresolved() 

2220 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2221 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2222 self.assertEqual(new_metric, old_metric) 

2223 

2224 # Now prune run2 collection and create instead a CHAINED collection. 

2225 # This should block the transfer. 

2226 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2227 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2228 with self.assertRaises(CollectionTypeError): 

2229 # Re-importing the run1 datasets can be problematic if they 

2230 # use integer IDs so filter those out. 

2231 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2232 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2233 

2234 

2235if __name__ == "__main__": 2235 ↛ 2236line 2235 didn't jump to line 2236, because the condition on line 2235 was never true

2236 unittest.main()