Coverage for tests/test_butler.py: 14%

1211 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-06 12:40 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionSearch, 

75 CollectionType, 

76 Config, 

77 DatasetIdGenEnum, 

78 DatasetRef, 

79 DatasetType, 

80 FileDataset, 

81 FileTemplate, 

82 FileTemplateValidationError, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

88from lsst.daf.butler.registry import ( 

89 CollectionError, 

90 CollectionTypeError, 

91 ConflictingDefinitionError, 

92 DataIdValueError, 

93 MissingCollectionError, 

94) 

95from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

96from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

97from lsst.resources import ResourcePath 

98from lsst.resources.http import _is_webdav_endpoint 

99from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

100from lsst.utils import doImport 

101from lsst.utils.introspection import get_full_type_name 

102 

103TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

104 

105 

106def makeExampleMetrics(): 

107 return MetricsExample( 

108 {"AM1": 5.2, "AM2": 30.6}, 

109 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

110 [563, 234, 456.7, 752, 8, 9, 27], 

111 ) 

112 

113 

114class TransactionTestError(Exception): 

115 """Specific error for testing transactions, to prevent misdiagnosing 

116 that might otherwise occur when a standard exception is used. 

117 """ 

118 

119 pass 

120 

121 

122class ButlerConfigTests(unittest.TestCase): 

123 """Simple tests for ButlerConfig that are not tested in any other test 

124 cases.""" 

125 

126 def testSearchPath(self): 

127 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

128 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

129 config1 = ButlerConfig(configFile) 

130 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

131 

132 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

133 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

134 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

135 self.assertIn("testConfigs", "\n".join(cm.output)) 

136 

137 key = ("datastore", "records", "table") 

138 self.assertNotEqual(config1[key], config2[key]) 

139 self.assertEqual(config2[key], "override_record") 

140 

141 

142class ButlerPutGetTests: 

143 """Helper method for running a suite of put/get tests from different 

144 butler configurations.""" 

145 

146 root = None 

147 default_run = "ingésτ😺" 

148 

149 @staticmethod 

150 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

151 """Create a DatasetType and register it""" 

152 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

153 registry.registerDatasetType(datasetType) 

154 return datasetType 

155 

156 @classmethod 

157 def setUpClass(cls): 

158 cls.storageClassFactory = StorageClassFactory() 

159 cls.storageClassFactory.addFromConfig(cls.configFile) 

160 

161 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

162 datasetType = datasetRef.datasetType 

163 dataId = datasetRef.dataId 

164 deferred = butler.getDirectDeferred(datasetRef) 

165 

166 for component in components: 

167 compTypeName = datasetType.componentTypeName(component) 

168 result = butler.get(compTypeName, dataId, collections=collections) 

169 self.assertEqual(result, getattr(reference, component)) 

170 result_deferred = deferred.get(component=component) 

171 self.assertEqual(result_deferred, result) 

172 

173 def tearDown(self): 

174 removeTestTempDir(self.root) 

175 

176 def create_butler(self, run, storageClass, datasetTypeName): 

177 butler = Butler(self.tmpConfigFile, run=run) 

178 

179 collections = set(butler.registry.queryCollections()) 

180 self.assertEqual(collections, set([run])) 

181 

182 # Create and register a DatasetType 

183 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

184 

185 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

186 

187 # Add needed Dimensions 

188 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

189 butler.registry.insertDimensionData( 

190 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

191 ) 

192 butler.registry.insertDimensionData( 

193 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

194 ) 

195 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

196 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

197 butler.registry.insertDimensionData( 

198 "visit", 

199 { 

200 "instrument": "DummyCamComp", 

201 "id": 423, 

202 "name": "fourtwentythree", 

203 "physical_filter": "d-r", 

204 "visit_system": 1, 

205 "datetime_begin": visit_start, 

206 "datetime_end": visit_end, 

207 }, 

208 ) 

209 

210 # Add more visits for some later tests 

211 for visit_id in (424, 425): 

212 butler.registry.insertDimensionData( 

213 "visit", 

214 { 

215 "instrument": "DummyCamComp", 

216 "id": visit_id, 

217 "name": f"fourtwentyfour_{visit_id}", 

218 "physical_filter": "d-r", 

219 "visit_system": 1, 

220 }, 

221 ) 

222 return butler, datasetType 

223 

224 def runPutGetTest(self, storageClass, datasetTypeName): 

225 # New datasets will be added to run and tag, but we will only look in 

226 # tag when looking up datasets. 

227 run = self.default_run 

228 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

229 

230 # Create and store a dataset 

231 metric = makeExampleMetrics() 

232 dataId = {"instrument": "DummyCamComp", "visit": 423} 

233 

234 # Create a DatasetRef for put 

235 refIn = DatasetRef(datasetType, dataId, id=None) 

236 

237 # Put with a preexisting id should fail 

238 with self.assertRaises(ValueError): 

239 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

240 

241 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

242 # and once with a DatasetType 

243 

244 # Keep track of any collections we add and do not clean up 

245 expected_collections = {run} 

246 

247 counter = 0 

248 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

249 # Since we are using subTest we can get cascading failures 

250 # here with the first attempt failing and the others failing 

251 # immediately because the dataset already exists. Work around 

252 # this by using a distinct run collection each time 

253 counter += 1 

254 this_run = f"put_run_{counter}" 

255 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

256 expected_collections.update({this_run}) 

257 

258 with self.subTest(args=args): 

259 ref = butler.put(metric, *args, run=this_run) 

260 self.assertIsInstance(ref, DatasetRef) 

261 

262 # Test getDirect 

263 metricOut = butler.getDirect(ref) 

264 self.assertEqual(metric, metricOut) 

265 # Test get 

266 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

267 self.assertEqual(metric, metricOut) 

268 # Test get with a datasetRef 

269 metricOut = butler.get(ref, collections=this_run) 

270 self.assertEqual(metric, metricOut) 

271 # Test getDeferred with dataId 

272 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

273 self.assertEqual(metric, metricOut) 

274 # Test getDeferred with a datasetRef 

275 metricOut = butler.getDeferred(ref, collections=this_run).get() 

276 self.assertEqual(metric, metricOut) 

277 # and deferred direct with ref 

278 metricOut = butler.getDirectDeferred(ref).get() 

279 self.assertEqual(metric, metricOut) 

280 

281 # Check we can get components 

282 if storageClass.isComposite(): 

283 self.assertGetComponents( 

284 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

285 ) 

286 

287 # Can the artifacts themselves be retrieved? 

288 if not butler.datastore.isEphemeral: 

289 root_uri = ResourcePath(self.root) 

290 

291 for preserve_path in (True, False): 

292 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

293 # Use copy so that we can test that overwrite 

294 # protection works (using "auto" for File URIs would 

295 # use hard links and subsequent transfer would work 

296 # because it knows they are the same file). 

297 transferred = butler.retrieveArtifacts( 

298 [ref], destination, preserve_path=preserve_path, transfer="copy" 

299 ) 

300 self.assertGreater(len(transferred), 0) 

301 artifacts = list(ResourcePath.findFileResources([destination])) 

302 self.assertEqual(set(transferred), set(artifacts)) 

303 

304 for artifact in transferred: 

305 path_in_destination = artifact.relative_to(destination) 

306 self.assertIsNotNone(path_in_destination) 

307 

308 # when path is not preserved there should not be 

309 # any path separators. 

310 num_seps = path_in_destination.count("/") 

311 if preserve_path: 

312 self.assertGreater(num_seps, 0) 

313 else: 

314 self.assertEqual(num_seps, 0) 

315 

316 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

317 n_uris = len(secondary_uris) 

318 if primary_uri: 

319 n_uris += 1 

320 self.assertEqual( 

321 len(artifacts), 

322 n_uris, 

323 "Comparing expected artifacts vs actual:" 

324 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

325 ) 

326 

327 if preserve_path: 

328 # No need to run these twice 

329 with self.assertRaises(ValueError): 

330 butler.retrieveArtifacts([ref], destination, transfer="move") 

331 

332 with self.assertRaises(FileExistsError): 

333 butler.retrieveArtifacts([ref], destination) 

334 

335 transferred_again = butler.retrieveArtifacts( 

336 [ref], destination, preserve_path=preserve_path, overwrite=True 

337 ) 

338 self.assertEqual(set(transferred_again), set(transferred)) 

339 

340 # Now remove the dataset completely. 

341 butler.pruneDatasets([ref], purge=True, unstore=True) 

342 # Lookup with original args should still fail. 

343 with self.assertRaises(LookupError): 

344 butler.datasetExists(*args, collections=this_run) 

345 # getDirect() should still fail. 

346 with self.assertRaises(FileNotFoundError): 

347 butler.getDirect(ref) 

348 # Registry shouldn't be able to find it by dataset_id anymore. 

349 self.assertIsNone(butler.registry.getDataset(ref.id)) 

350 

351 # Do explicit registry removal since we know they are 

352 # empty 

353 butler.registry.removeCollection(this_run) 

354 expected_collections.remove(this_run) 

355 

356 # Put the dataset again, since the last thing we did was remove it 

357 # and we want to use the default collection. 

358 ref = butler.put(metric, refIn) 

359 

360 # Get with parameters 

361 stop = 4 

362 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

363 self.assertNotEqual(metric, sliced) 

364 self.assertEqual(metric.summary, sliced.summary) 

365 self.assertEqual(metric.output, sliced.output) 

366 self.assertEqual(metric.data[:stop], sliced.data) 

367 # getDeferred with parameters 

368 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

369 self.assertNotEqual(metric, sliced) 

370 self.assertEqual(metric.summary, sliced.summary) 

371 self.assertEqual(metric.output, sliced.output) 

372 self.assertEqual(metric.data[:stop], sliced.data) 

373 # getDeferred with deferred parameters 

374 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

375 self.assertNotEqual(metric, sliced) 

376 self.assertEqual(metric.summary, sliced.summary) 

377 self.assertEqual(metric.output, sliced.output) 

378 self.assertEqual(metric.data[:stop], sliced.data) 

379 

380 if storageClass.isComposite(): 

381 # Check that components can be retrieved 

382 metricOut = butler.get(ref.datasetType.name, dataId) 

383 compNameS = ref.datasetType.componentTypeName("summary") 

384 compNameD = ref.datasetType.componentTypeName("data") 

385 summary = butler.get(compNameS, dataId) 

386 self.assertEqual(summary, metric.summary) 

387 data = butler.get(compNameD, dataId) 

388 self.assertEqual(data, metric.data) 

389 

390 if "counter" in storageClass.derivedComponents: 

391 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

392 self.assertEqual(count, len(data)) 

393 

394 count = butler.get( 

395 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

396 ) 

397 self.assertEqual(count, stop) 

398 

399 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

400 summary = butler.getDirect(compRef) 

401 self.assertEqual(summary, metric.summary) 

402 

403 # Create a Dataset type that has the same name but is inconsistent. 

404 inconsistentDatasetType = DatasetType( 

405 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

406 ) 

407 

408 # Getting with a dataset type that does not match registry fails 

409 with self.assertRaises(ValueError): 

410 butler.get(inconsistentDatasetType, dataId) 

411 

412 # Combining a DatasetRef with a dataId should fail 

413 with self.assertRaises(ValueError): 

414 butler.get(ref, dataId) 

415 # Getting with an explicit ref should fail if the id doesn't match 

416 with self.assertRaises(ValueError): 

417 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

418 

419 # Getting a dataset with unknown parameters should fail 

420 with self.assertRaises(KeyError): 

421 butler.get(ref, parameters={"unsupported": True}) 

422 

423 # Check we have a collection 

424 collections = set(butler.registry.queryCollections()) 

425 self.assertEqual(collections, expected_collections) 

426 

427 # Clean up to check that we can remove something that may have 

428 # already had a component removed 

429 butler.pruneDatasets([ref], unstore=True, purge=True) 

430 

431 # Check that we can configure a butler to accept a put even 

432 # if it already has the dataset in registry. 

433 ref = butler.put(metric, refIn) 

434 

435 # Repeat put will fail. 

436 with self.assertRaises(ConflictingDefinitionError): 

437 butler.put(metric, refIn) 

438 

439 # Remove the datastore entry. 

440 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

441 

442 # Put will still fail 

443 with self.assertRaises(ConflictingDefinitionError): 

444 butler.put(metric, refIn) 

445 

446 # Allow the put to succeed 

447 butler._allow_put_of_predefined_dataset = True 

448 ref2 = butler.put(metric, refIn) 

449 self.assertEqual(ref2.id, ref.id) 

450 

451 # A second put will still fail but with a different exception 

452 # than before. 

453 with self.assertRaises(ConflictingDefinitionError): 

454 butler.put(metric, refIn) 

455 

456 # Reset the flag to avoid confusion 

457 butler._allow_put_of_predefined_dataset = False 

458 

459 # Leave the dataset in place since some downstream tests require 

460 # something to be present 

461 

462 return butler 

463 

464 def testDeferredCollectionPassing(self): 

465 # Construct a butler with no run or collection, but make it writeable. 

466 butler = Butler(self.tmpConfigFile, writeable=True) 

467 # Create and register a DatasetType 

468 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

469 datasetType = self.addDatasetType( 

470 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

471 ) 

472 # Add needed Dimensions 

473 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

474 butler.registry.insertDimensionData( 

475 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

476 ) 

477 butler.registry.insertDimensionData( 

478 "visit", 

479 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

480 ) 

481 dataId = {"instrument": "DummyCamComp", "visit": 423} 

482 # Create dataset. 

483 metric = makeExampleMetrics() 

484 # Register a new run and put dataset. 

485 run = "deferred" 

486 self.assertTrue(butler.registry.registerRun(run)) 

487 # Second time it will be allowed but indicate no-op 

488 self.assertFalse(butler.registry.registerRun(run)) 

489 ref = butler.put(metric, datasetType, dataId, run=run) 

490 # Putting with no run should fail with TypeError. 

491 with self.assertRaises(CollectionError): 

492 butler.put(metric, datasetType, dataId) 

493 # Dataset should exist. 

494 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

495 # We should be able to get the dataset back, but with and without 

496 # a deferred dataset handle. 

497 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

498 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

499 # Trying to find the dataset without any collection is a TypeError. 

500 with self.assertRaises(CollectionError): 

501 butler.datasetExists(datasetType, dataId) 

502 with self.assertRaises(CollectionError): 

503 butler.get(datasetType, dataId) 

504 # Associate the dataset with a different collection. 

505 butler.registry.registerCollection("tagged") 

506 butler.registry.associate("tagged", [ref]) 

507 # Deleting the dataset from the new collection should make it findable 

508 # in the original collection. 

509 butler.pruneDatasets([ref], tags=["tagged"]) 

510 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

511 

512 

513class ButlerTests(ButlerPutGetTests): 

514 """Tests for Butler.""" 

515 

516 useTempRoot = True 

517 

518 def setUp(self): 

519 """Create a new butler root for each test.""" 

520 self.root = makeTestTempDir(TESTDIR) 

521 Butler.makeRepo(self.root, config=Config(self.configFile)) 

522 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

523 

524 def testConstructor(self): 

525 """Independent test of constructor.""" 

526 butler = Butler(self.tmpConfigFile, run=self.default_run) 

527 self.assertIsInstance(butler, Butler) 

528 

529 # Check that butler.yaml is added automatically. 

530 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

531 config_dir = self.tmpConfigFile[: -len(end)] 

532 butler = Butler(config_dir, run=self.default_run) 

533 self.assertIsInstance(butler, Butler) 

534 

535 # Even with a ResourcePath. 

536 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

537 self.assertIsInstance(butler, Butler) 

538 

539 collections = set(butler.registry.queryCollections()) 

540 self.assertEqual(collections, {self.default_run}) 

541 

542 # Check that some special characters can be included in run name. 

543 special_run = "u@b.c-A" 

544 butler_special = Butler(butler=butler, run=special_run) 

545 collections = set(butler_special.registry.queryCollections("*@*")) 

546 self.assertEqual(collections, {special_run}) 

547 

548 butler2 = Butler(butler=butler, collections=["other"]) 

549 self.assertEqual(butler2.collections, CollectionSearch.fromExpression(["other"])) 

550 self.assertIsNone(butler2.run) 

551 self.assertIs(butler.datastore, butler2.datastore) 

552 

553 # Test that we can use an environment variable to find this 

554 # repository. 

555 butler_index = Config() 

556 butler_index["label"] = self.tmpConfigFile 

557 for suffix in (".yaml", ".json"): 

558 # Ensure that the content differs so that we know that 

559 # we aren't reusing the cache. 

560 bad_label = f"s3://bucket/not_real{suffix}" 

561 butler_index["bad_label"] = bad_label 

562 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

563 butler_index.dumpToUri(temp_file) 

564 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

565 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

566 uri = Butler.get_repo_uri("bad_label") 

567 self.assertEqual(uri, ResourcePath(bad_label)) 

568 uri = Butler.get_repo_uri("label") 

569 butler = Butler(uri, writeable=False) 

570 self.assertIsInstance(butler, Butler) 

571 butler = Butler("label", writeable=False) 

572 self.assertIsInstance(butler, Butler) 

573 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

574 Butler("not_there", writeable=False) 

575 with self.assertRaises(KeyError) as cm: 

576 Butler.get_repo_uri("missing") 

577 self.assertIn("not known to", str(cm.exception)) 

578 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

579 with self.assertRaises(FileNotFoundError): 

580 Butler.get_repo_uri("label") 

581 self.assertEqual(Butler.get_known_repos(), set()) 

582 with self.assertRaises(KeyError) as cm: 

583 # No environment variable set. 

584 Butler.get_repo_uri("label") 

585 self.assertIn("No repository index defined", str(cm.exception)) 

586 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

587 # No aliases registered. 

588 Butler("not_there") 

589 self.assertEqual(Butler.get_known_repos(), set()) 

590 

591 def testBasicPutGet(self): 

592 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

593 self.runPutGetTest(storageClass, "test_metric") 

594 

595 def testCompositePutGetConcrete(self): 

596 

597 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

598 butler = self.runPutGetTest(storageClass, "test_metric") 

599 

600 # Should *not* be disassembled 

601 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

602 self.assertEqual(len(datasets), 1) 

603 uri, components = butler.getURIs(datasets[0]) 

604 self.assertIsInstance(uri, ResourcePath) 

605 self.assertFalse(components) 

606 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

607 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

608 

609 # Predicted dataset 

610 dataId = {"instrument": "DummyCamComp", "visit": 424} 

611 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

612 self.assertFalse(components) 

613 self.assertIsInstance(uri, ResourcePath) 

614 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

615 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

616 

617 def testCompositePutGetVirtual(self): 

618 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

619 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

620 

621 # Should be disassembled 

622 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

623 self.assertEqual(len(datasets), 1) 

624 uri, components = butler.getURIs(datasets[0]) 

625 

626 if butler.datastore.isEphemeral: 

627 # Never disassemble in-memory datastore 

628 self.assertIsInstance(uri, ResourcePath) 

629 self.assertFalse(components) 

630 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

631 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

632 else: 

633 self.assertIsNone(uri) 

634 self.assertEqual(set(components), set(storageClass.components)) 

635 for compuri in components.values(): 

636 self.assertIsInstance(compuri, ResourcePath) 

637 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

638 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

639 

640 # Predicted dataset 

641 dataId = {"instrument": "DummyCamComp", "visit": 424} 

642 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

643 

644 if butler.datastore.isEphemeral: 

645 # Never disassembled 

646 self.assertIsInstance(uri, ResourcePath) 

647 self.assertFalse(components) 

648 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

649 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

650 else: 

651 self.assertIsNone(uri) 

652 self.assertEqual(set(components), set(storageClass.components)) 

653 for compuri in components.values(): 

654 self.assertIsInstance(compuri, ResourcePath) 

655 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

656 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

657 

658 def testIngest(self): 

659 butler = Butler(self.tmpConfigFile, run=self.default_run) 

660 

661 # Create and register a DatasetType 

662 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

663 

664 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

665 datasetTypeName = "metric" 

666 

667 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

668 

669 # Add needed Dimensions 

670 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

671 butler.registry.insertDimensionData( 

672 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

673 ) 

674 for detector in (1, 2): 

675 butler.registry.insertDimensionData( 

676 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

677 ) 

678 

679 butler.registry.insertDimensionData( 

680 "visit", 

681 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

682 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

683 ) 

684 

685 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

686 dataRoot = os.path.join(TESTDIR, "data", "basic") 

687 datasets = [] 

688 for detector in (1, 2): 

689 detector_name = f"detector_{detector}" 

690 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

691 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

692 # Create a DatasetRef for ingest 

693 refIn = DatasetRef(datasetType, dataId, id=None) 

694 

695 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

696 

697 butler.ingest(*datasets, transfer="copy") 

698 

699 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

700 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

701 

702 metrics1 = butler.get(datasetTypeName, dataId1) 

703 metrics2 = butler.get(datasetTypeName, dataId2) 

704 self.assertNotEqual(metrics1, metrics2) 

705 

706 # Compare URIs 

707 uri1 = butler.getURI(datasetTypeName, dataId1) 

708 uri2 = butler.getURI(datasetTypeName, dataId2) 

709 self.assertNotEqual(uri1, uri2) 

710 

711 # Now do a multi-dataset but single file ingest 

712 metricFile = os.path.join(dataRoot, "detectors.yaml") 

713 refs = [] 

714 for detector in (1, 2): 

715 detector_name = f"detector_{detector}" 

716 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

717 # Create a DatasetRef for ingest 

718 refs.append(DatasetRef(datasetType, dataId, id=None)) 

719 

720 datasets = [] 

721 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

722 

723 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

724 

725 # Check that the datastore recorded no file size. 

726 # Not all datastores can support this. 

727 try: 

728 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

729 self.assertEqual(infos[0].file_size, -1) 

730 except AttributeError: 

731 pass 

732 

733 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

734 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

735 

736 multi1 = butler.get(datasetTypeName, dataId1) 

737 multi2 = butler.get(datasetTypeName, dataId2) 

738 

739 self.assertEqual(multi1, metrics1) 

740 self.assertEqual(multi2, metrics2) 

741 

742 # Compare URIs 

743 uri1 = butler.getURI(datasetTypeName, dataId1) 

744 uri2 = butler.getURI(datasetTypeName, dataId2) 

745 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

746 

747 # Test that removing one does not break the second 

748 # This line will issue a warning log message for a ChainedDatastore 

749 # that uses an InMemoryDatastore since in-memory can not ingest 

750 # files. 

751 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

752 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

753 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

754 multi2b = butler.get(datasetTypeName, dataId2) 

755 self.assertEqual(multi2, multi2b) 

756 

757 def testPruneCollections(self): 

758 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

759 butler = Butler(self.tmpConfigFile, writeable=True) 

760 # Load registry data with dimensions to hang datasets off of. 

761 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

762 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

763 # Add some RUN-type collections. 

764 run1 = "run1" 

765 butler.registry.registerRun(run1) 

766 run2 = "run2" 

767 butler.registry.registerRun(run2) 

768 # put some datasets. ref1 and ref2 have the same data ID, and are in 

769 # different runs. ref3 has a different data ID. 

770 metric = makeExampleMetrics() 

771 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

772 datasetType = self.addDatasetType( 

773 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

774 ) 

775 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

776 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

777 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

778 

779 # Try to delete a RUN collection without purge, or with purge and not 

780 # unstore. 

781 with self.assertRaises(TypeError): 

782 butler.pruneCollection(run1) 

783 with self.assertRaises(TypeError): 

784 butler.pruneCollection(run2, purge=True) 

785 # Add a TAGGED collection and associate ref3 only into it. 

786 tag1 = "tag1" 

787 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

788 self.assertTrue(registered) 

789 # Registering a second time should be allowed. 

790 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

791 self.assertFalse(registered) 

792 butler.registry.associate(tag1, [ref3]) 

793 # Add a CHAINED collection that searches run1 and then run2. It 

794 # logically contains only ref1, because ref2 is shadowed due to them 

795 # having the same data ID and dataset type. 

796 chain1 = "chain1" 

797 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

798 butler.registry.setCollectionChain(chain1, [run1, run2]) 

799 # Try to delete RUN collections, which should fail with complete 

800 # rollback because they're still referenced by the CHAINED 

801 # collection. 

802 with self.assertRaises(Exception): 

803 butler.pruneCollection(run1, pruge=True, unstore=True) 

804 with self.assertRaises(Exception): 

805 butler.pruneCollection(run2, pruge=True, unstore=True) 

806 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

807 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

808 self.assertTrue(existence[ref1]) 

809 self.assertTrue(existence[ref2]) 

810 self.assertTrue(existence[ref3]) 

811 # Try to delete CHAINED and TAGGED collections with purge; should not 

812 # work. 

813 with self.assertRaises(TypeError): 

814 butler.pruneCollection(tag1, purge=True, unstore=True) 

815 with self.assertRaises(TypeError): 

816 butler.pruneCollection(chain1, purge=True, unstore=True) 

817 # Remove the tagged collection with unstore=False. This should not 

818 # affect the datasets. 

819 butler.pruneCollection(tag1) 

820 with self.assertRaises(MissingCollectionError): 

821 butler.registry.getCollectionType(tag1) 

822 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

823 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

824 self.assertTrue(existence[ref1]) 

825 self.assertTrue(existence[ref2]) 

826 self.assertTrue(existence[ref3]) 

827 # Add the tagged collection back in, and remove it with unstore=True. 

828 # This should remove ref3 only from the datastore. 

829 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

830 butler.registry.associate(tag1, [ref3]) 

831 butler.pruneCollection(tag1, unstore=True) 

832 with self.assertRaises(MissingCollectionError): 

833 butler.registry.getCollectionType(tag1) 

834 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

835 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

836 self.assertTrue(existence[ref1]) 

837 self.assertTrue(existence[ref2]) 

838 self.assertFalse(existence[ref3]) 

839 # Delete the chain with unstore=False. The datasets should not be 

840 # affected at all. 

841 butler.pruneCollection(chain1) 

842 with self.assertRaises(MissingCollectionError): 

843 butler.registry.getCollectionType(chain1) 

844 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

845 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

846 self.assertTrue(existence[ref1]) 

847 self.assertTrue(existence[ref2]) 

848 self.assertFalse(existence[ref3]) 

849 # Redefine and then delete the chain with unstore=True. Only ref1 

850 # should be unstored (ref3 has already been unstored, but otherwise 

851 # would be now). 

852 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

853 butler.registry.setCollectionChain(chain1, [run1, run2]) 

854 butler.pruneCollection(chain1, unstore=True) 

855 with self.assertRaises(MissingCollectionError): 

856 butler.registry.getCollectionType(chain1) 

857 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

858 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

859 self.assertFalse(existence[ref1]) 

860 self.assertTrue(existence[ref2]) 

861 self.assertFalse(existence[ref3]) 

862 # Remove run1. This removes ref1 and ref3 from the registry (they're 

863 # already gone from the datastore, which is fine). 

864 butler.pruneCollection(run1, purge=True, unstore=True) 

865 with self.assertRaises(MissingCollectionError): 

866 butler.registry.getCollectionType(run1) 

867 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

868 self.assertTrue(butler.datastore.exists(ref2)) 

869 # Remove run2. This removes ref2 from the registry and the datastore. 

870 butler.pruneCollection(run2, purge=True, unstore=True) 

871 with self.assertRaises(MissingCollectionError): 

872 butler.registry.getCollectionType(run2) 

873 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

874 

875 # Now that the collections have been pruned we can remove the 

876 # dataset type 

877 butler.registry.removeDatasetType(datasetType.name) 

878 

879 def testPickle(self): 

880 """Test pickle support.""" 

881 butler = Butler(self.tmpConfigFile, run=self.default_run) 

882 butlerOut = pickle.loads(pickle.dumps(butler)) 

883 self.assertIsInstance(butlerOut, Butler) 

884 self.assertEqual(butlerOut._config, butler._config) 

885 self.assertEqual(butlerOut.collections, butler.collections) 

886 self.assertEqual(butlerOut.run, butler.run) 

887 

888 def testGetDatasetTypes(self): 

889 butler = Butler(self.tmpConfigFile, run=self.default_run) 

890 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

891 dimensionEntries = [ 

892 ( 

893 "instrument", 

894 {"instrument": "DummyCam"}, 

895 {"instrument": "DummyHSC"}, 

896 {"instrument": "DummyCamComp"}, 

897 ), 

898 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

899 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

900 ] 

901 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

902 # Add needed Dimensions 

903 for args in dimensionEntries: 

904 butler.registry.insertDimensionData(*args) 

905 

906 # When a DatasetType is added to the registry entries are not created 

907 # for components but querying them can return the components. 

908 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

909 components = set() 

910 for datasetTypeName in datasetTypeNames: 

911 # Create and register a DatasetType 

912 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

913 

914 for componentName in storageClass.components: 

915 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

916 

917 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

918 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

919 

920 # Now that we have some dataset types registered, validate them 

921 butler.validateConfiguration( 

922 ignore=[ 

923 "test_metric_comp", 

924 "metric3", 

925 "metric5", 

926 "calexp", 

927 "DummySC", 

928 "datasetType.component", 

929 "random_data", 

930 "random_data_2", 

931 ] 

932 ) 

933 

934 # Add a new datasetType that will fail template validation 

935 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

936 if self.validationCanFail: 

937 with self.assertRaises(ValidationError): 

938 butler.validateConfiguration() 

939 

940 # Rerun validation but with a subset of dataset type names 

941 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

942 

943 # Rerun validation but ignore the bad datasetType 

944 butler.validateConfiguration( 

945 ignore=[ 

946 "test_metric_comp", 

947 "metric3", 

948 "metric5", 

949 "calexp", 

950 "DummySC", 

951 "datasetType.component", 

952 "random_data", 

953 "random_data_2", 

954 ] 

955 ) 

956 

957 def testTransaction(self): 

958 butler = Butler(self.tmpConfigFile, run=self.default_run) 

959 datasetTypeName = "test_metric" 

960 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

961 dimensionEntries = ( 

962 ("instrument", {"instrument": "DummyCam"}), 

963 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

964 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

965 ) 

966 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

967 metric = makeExampleMetrics() 

968 dataId = {"instrument": "DummyCam", "visit": 42} 

969 # Create and register a DatasetType 

970 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

971 with self.assertRaises(TransactionTestError): 

972 with butler.transaction(): 

973 # Add needed Dimensions 

974 for args in dimensionEntries: 

975 butler.registry.insertDimensionData(*args) 

976 # Store a dataset 

977 ref = butler.put(metric, datasetTypeName, dataId) 

978 self.assertIsInstance(ref, DatasetRef) 

979 # Test getDirect 

980 metricOut = butler.getDirect(ref) 

981 self.assertEqual(metric, metricOut) 

982 # Test get 

983 metricOut = butler.get(datasetTypeName, dataId) 

984 self.assertEqual(metric, metricOut) 

985 # Check we can get components 

986 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

987 raise TransactionTestError("This should roll back the entire transaction") 

988 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

989 butler.registry.expandDataId(dataId) 

990 # Should raise LookupError for missing data ID value 

991 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

992 butler.get(datasetTypeName, dataId) 

993 # Also check explicitly if Dataset entry is missing 

994 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

995 # Direct retrieval should not find the file in the Datastore 

996 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

997 butler.getDirect(ref) 

998 

999 def testMakeRepo(self): 

1000 """Test that we can write butler configuration to a new repository via 

1001 the Butler.makeRepo interface and then instantiate a butler from the 

1002 repo root. 

1003 """ 

1004 # Do not run the test if we know this datastore configuration does 

1005 # not support a file system root 

1006 if self.fullConfigKey is None: 

1007 return 

1008 

1009 # create two separate directories 

1010 root1 = tempfile.mkdtemp(dir=self.root) 

1011 root2 = tempfile.mkdtemp(dir=self.root) 

1012 

1013 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1014 limited = Config(self.configFile) 

1015 butler1 = Butler(butlerConfig) 

1016 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1017 full = Config(self.tmpConfigFile) 

1018 butler2 = Butler(butlerConfig) 

1019 # Butlers should have the same configuration regardless of whether 

1020 # defaults were expanded. 

1021 self.assertEqual(butler1._config, butler2._config) 

1022 # Config files loaded directly should not be the same. 

1023 self.assertNotEqual(limited, full) 

1024 # Make sure "limited" doesn't have a few keys we know it should be 

1025 # inheriting from defaults. 

1026 self.assertIn(self.fullConfigKey, full) 

1027 self.assertNotIn(self.fullConfigKey, limited) 

1028 

1029 # Collections don't appear until something is put in them 

1030 collections1 = set(butler1.registry.queryCollections()) 

1031 self.assertEqual(collections1, set()) 

1032 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1033 

1034 # Check that a config with no associated file name will not 

1035 # work properly with relocatable Butler repo 

1036 butlerConfig.configFile = None 

1037 with self.assertRaises(ValueError): 

1038 Butler(butlerConfig) 

1039 

1040 with self.assertRaises(FileExistsError): 

1041 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1042 

1043 def testStringification(self): 

1044 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1045 butlerStr = str(butler) 

1046 

1047 if self.datastoreStr is not None: 

1048 for testStr in self.datastoreStr: 

1049 self.assertIn(testStr, butlerStr) 

1050 if self.registryStr is not None: 

1051 self.assertIn(self.registryStr, butlerStr) 

1052 

1053 datastoreName = butler.datastore.name 

1054 if self.datastoreName is not None: 

1055 for testStr in self.datastoreName: 

1056 self.assertIn(testStr, datastoreName) 

1057 

1058 def testButlerRewriteDataId(self): 

1059 """Test that dataIds can be rewritten based on dimension records.""" 

1060 

1061 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1062 

1063 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1064 datasetTypeName = "random_data" 

1065 

1066 # Create dimension records. 

1067 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1068 butler.registry.insertDimensionData( 

1069 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1070 ) 

1071 butler.registry.insertDimensionData( 

1072 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1073 ) 

1074 

1075 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1076 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1077 butler.registry.registerDatasetType(datasetType) 

1078 

1079 n_exposures = 5 

1080 dayobs = 20210530 

1081 

1082 for i in range(n_exposures): 

1083 butler.registry.insertDimensionData( 

1084 "exposure", 

1085 { 

1086 "instrument": "DummyCamComp", 

1087 "id": i, 

1088 "obs_id": f"exp{i}", 

1089 "seq_num": i, 

1090 "day_obs": dayobs, 

1091 "physical_filter": "d-r", 

1092 }, 

1093 ) 

1094 

1095 # Write some data. 

1096 for i in range(n_exposures): 

1097 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1098 

1099 # Use the seq_num for the put to test rewriting. 

1100 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1101 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1102 

1103 # Check that the exposure is correct in the dataId 

1104 self.assertEqual(ref.dataId["exposure"], i) 

1105 

1106 # and check that we can get the dataset back with the same dataId 

1107 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1108 self.assertEqual(new_metric, metric) 

1109 

1110 

1111class FileDatastoreButlerTests(ButlerTests): 

1112 """Common tests and specialization of ButlerTests for butlers backed 

1113 by datastores that inherit from FileDatastore. 

1114 """ 

1115 

1116 def checkFileExists(self, root, relpath): 

1117 """Checks if file exists at a given path (relative to root). 

1118 

1119 Test testPutTemplates verifies actual physical existance of the files 

1120 in the requested location. 

1121 """ 

1122 uri = ResourcePath(root, forceDirectory=True) 

1123 return uri.join(relpath).exists() 

1124 

1125 def testPutTemplates(self): 

1126 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1127 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1128 

1129 # Add needed Dimensions 

1130 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1131 butler.registry.insertDimensionData( 

1132 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1133 ) 

1134 butler.registry.insertDimensionData( 

1135 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1136 ) 

1137 butler.registry.insertDimensionData( 

1138 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1139 ) 

1140 

1141 # Create and store a dataset 

1142 metric = makeExampleMetrics() 

1143 

1144 # Create two almost-identical DatasetTypes (both will use default 

1145 # template) 

1146 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1147 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1148 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1149 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1150 

1151 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1152 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1153 

1154 # Put with exactly the data ID keys needed 

1155 ref = butler.put(metric, "metric1", dataId1) 

1156 uri = butler.getURI(ref) 

1157 self.assertTrue( 

1158 self.checkFileExists( 

1159 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1160 ), 

1161 f"Checking existence of {uri}", 

1162 ) 

1163 

1164 # Check the template based on dimensions 

1165 butler.datastore.templates.validateTemplates([ref]) 

1166 

1167 # Put with extra data ID keys (physical_filter is an optional 

1168 # dependency); should not change template (at least the way we're 

1169 # defining them to behave now; the important thing is that they 

1170 # must be consistent). 

1171 ref = butler.put(metric, "metric2", dataId2) 

1172 uri = butler.getURI(ref) 

1173 self.assertTrue( 

1174 self.checkFileExists( 

1175 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1176 ), 

1177 f"Checking existence of {uri}", 

1178 ) 

1179 

1180 # Check the template based on dimensions 

1181 butler.datastore.templates.validateTemplates([ref]) 

1182 

1183 # Use a template that has a typo in dimension record metadata. 

1184 # Easier to test with a butler that has a ref with records attached. 

1185 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1186 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1187 path = template.format(ref) 

1188 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1189 

1190 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1191 with self.assertRaises(KeyError): 

1192 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1193 template.format(ref) 

1194 

1195 # Now use a file template that will not result in unique filenames 

1196 with self.assertRaises(FileTemplateValidationError): 

1197 butler.put(metric, "metric3", dataId1) 

1198 

1199 def testImportExport(self): 

1200 # Run put/get tests just to create and populate a repo. 

1201 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1202 self.runImportExportTest(storageClass) 

1203 

1204 @unittest.expectedFailure 

1205 def testImportExportVirtualComposite(self): 

1206 # Run put/get tests just to create and populate a repo. 

1207 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1208 self.runImportExportTest(storageClass) 

1209 

1210 def runImportExportTest(self, storageClass): 

1211 """This test does an export to a temp directory and an import back 

1212 into a new temp directory repo. It does not assume a posix datastore""" 

1213 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1214 print("Root:", exportButler.datastore.root) 

1215 # Test that the repo actually has at least one dataset. 

1216 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1217 self.assertGreater(len(datasets), 0) 

1218 # Add a DimensionRecord that's unused by those datasets. 

1219 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1220 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1221 # Export and then import datasets. 

1222 with safeTestTempDir(TESTDIR) as exportDir: 

1223 exportFile = os.path.join(exportDir, "exports.yaml") 

1224 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1225 export.saveDatasets(datasets) 

1226 # Export the same datasets again. This should quietly do 

1227 # nothing because of internal deduplication, and it shouldn't 

1228 # complain about being asked to export the "htm7" elements even 

1229 # though there aren't any in these datasets or in the database. 

1230 export.saveDatasets(datasets, elements=["htm7"]) 

1231 # Save one of the data IDs again; this should be harmless 

1232 # because of internal deduplication. 

1233 export.saveDataIds([datasets[0].dataId]) 

1234 # Save some dimension records directly. 

1235 export.saveDimensionData("skymap", [skymapRecord]) 

1236 self.assertTrue(os.path.exists(exportFile)) 

1237 with safeTestTempDir(TESTDIR) as importDir: 

1238 # We always want this to be a local posix butler 

1239 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1240 # Calling script.butlerImport tests the implementation of the 

1241 # butler command line interface "import" subcommand. Functions 

1242 # in the script folder are generally considered protected and 

1243 # should not be used as public api. 

1244 with open(exportFile, "r") as f: 

1245 script.butlerImport( 

1246 importDir, 

1247 export_file=f, 

1248 directory=exportDir, 

1249 transfer="auto", 

1250 skip_dimensions=None, 

1251 reuse_ids=False, 

1252 ) 

1253 importButler = Butler(importDir, run=self.default_run) 

1254 for ref in datasets: 

1255 with self.subTest(ref=ref): 

1256 # Test for existence by passing in the DatasetType and 

1257 # data ID separately, to avoid lookup by dataset_id. 

1258 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1259 self.assertEqual( 

1260 list(importButler.registry.queryDimensionRecords("skymap")), 

1261 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1262 ) 

1263 

1264 def testRemoveRuns(self): 

1265 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1266 butler = Butler(self.tmpConfigFile, writeable=True) 

1267 # Load registry data with dimensions to hang datasets off of. 

1268 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1269 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1270 # Add some RUN-type collection. 

1271 run1 = "run1" 

1272 butler.registry.registerRun(run1) 

1273 run2 = "run2" 

1274 butler.registry.registerRun(run2) 

1275 # put a dataset in each 

1276 metric = makeExampleMetrics() 

1277 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1278 datasetType = self.addDatasetType( 

1279 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1280 ) 

1281 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1282 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1283 uri1 = butler.getURI(ref1, collections=[run1]) 

1284 uri2 = butler.getURI(ref2, collections=[run2]) 

1285 # Remove from both runs with different values for unstore. 

1286 butler.removeRuns([run1], unstore=True) 

1287 butler.removeRuns([run2], unstore=False) 

1288 # Should be nothing in registry for either one, and datastore should 

1289 # not think either exists. 

1290 with self.assertRaises(MissingCollectionError): 

1291 butler.registry.getCollectionType(run1) 

1292 with self.assertRaises(MissingCollectionError): 

1293 butler.registry.getCollectionType(run2) 

1294 self.assertFalse(butler.datastore.exists(ref1)) 

1295 self.assertFalse(butler.datastore.exists(ref2)) 

1296 # The ref we unstored should be gone according to the URI, but the 

1297 # one we forgot should still be around. 

1298 self.assertFalse(uri1.exists()) 

1299 self.assertTrue(uri2.exists()) 

1300 

1301 

1302class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1303 """PosixDatastore specialization of a butler""" 

1304 

1305 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1306 fullConfigKey = ".datastore.formatters" 

1307 validationCanFail = True 

1308 datastoreStr = ["/tmp"] 

1309 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1310 registryStr = "/gen3.sqlite3" 

1311 

1312 def testPathConstructor(self): 

1313 """Independent test of constructor using PathLike.""" 

1314 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1315 self.assertIsInstance(butler, Butler) 

1316 

1317 # And again with a Path object with the butler yaml 

1318 path = pathlib.Path(self.tmpConfigFile) 

1319 butler = Butler(path, writeable=False) 

1320 self.assertIsInstance(butler, Butler) 

1321 

1322 # And again with a Path object without the butler yaml 

1323 # (making sure we skip it if the tmp config doesn't end 

1324 # in butler.yaml -- which is the case for a subclass) 

1325 if self.tmpConfigFile.endswith("butler.yaml"): 

1326 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1327 butler = Butler(path, writeable=False) 

1328 self.assertIsInstance(butler, Butler) 

1329 

1330 def testExportTransferCopy(self): 

1331 """Test local export using all transfer modes""" 

1332 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1333 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1334 # Test that the repo actually has at least one dataset. 

1335 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1336 self.assertGreater(len(datasets), 0) 

1337 uris = [exportButler.getURI(d) for d in datasets] 

1338 datastoreRoot = exportButler.datastore.root 

1339 

1340 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1341 

1342 for path in pathsInStore: 

1343 # Assume local file system 

1344 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1345 

1346 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1347 with safeTestTempDir(TESTDIR) as exportDir: 

1348 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1349 export.saveDatasets(datasets) 

1350 for path in pathsInStore: 

1351 self.assertTrue( 

1352 self.checkFileExists(exportDir, path), 

1353 f"Check that mode {transfer} exported files", 

1354 ) 

1355 

1356 def testPruneDatasets(self): 

1357 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1358 butler = Butler(self.tmpConfigFile, writeable=True) 

1359 # Load registry data with dimensions to hang datasets off of. 

1360 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1361 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1362 # Add some RUN-type collections. 

1363 run1 = "run1" 

1364 butler.registry.registerRun(run1) 

1365 run2 = "run2" 

1366 butler.registry.registerRun(run2) 

1367 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1368 # different runs. ref3 has a different data ID. 

1369 metric = makeExampleMetrics() 

1370 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1371 datasetType = self.addDatasetType( 

1372 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1373 ) 

1374 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1375 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1376 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1377 

1378 # Simple prune. 

1379 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1380 with self.assertRaises(LookupError): 

1381 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1382 

1383 # Put data back. 

1384 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1385 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1386 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1387 

1388 # Check that in normal mode, deleting the record will lead to 

1389 # trash not touching the file. 

1390 uri1 = butler.datastore.getURI(ref1) 

1391 butler.datastore.bridge.moveToTrash([ref1]) # Update the dataset_location table 

1392 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1393 butler.datastore.trash(ref1) 

1394 butler.datastore.emptyTrash() 

1395 self.assertTrue(uri1.exists()) 

1396 uri1.remove() # Clean it up. 

1397 

1398 # Simulate execution butler setup by deleting the datastore 

1399 # record but keeping the file around and trusting. 

1400 butler.datastore.trustGetRequest = True 

1401 uri2 = butler.datastore.getURI(ref2) 

1402 uri3 = butler.datastore.getURI(ref3) 

1403 self.assertTrue(uri2.exists()) 

1404 self.assertTrue(uri3.exists()) 

1405 

1406 # Remove the datastore record. 

1407 butler.datastore.bridge.moveToTrash([ref2]) # Update the dataset_location table 

1408 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1409 self.assertTrue(uri2.exists()) 

1410 butler.datastore.trash([ref2, ref3]) 

1411 # Immediate removal for ref2 file 

1412 self.assertFalse(uri2.exists()) 

1413 # But ref3 has to wait for the empty. 

1414 self.assertTrue(uri3.exists()) 

1415 butler.datastore.emptyTrash() 

1416 self.assertFalse(uri3.exists()) 

1417 

1418 # Clear out the datasets from registry. 

1419 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1420 

1421 def testPytypePutCoercion(self): 

1422 """Test python type coercion on Butler.get and put.""" 

1423 

1424 # Store some data with the normal example storage class. 

1425 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1426 datasetTypeName = "test_metric" 

1427 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1428 

1429 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1430 

1431 # Put a dict and this should coerce to a MetricsExample 

1432 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1433 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1434 test_metric = butler.getDirect(metric_ref) 

1435 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1436 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1437 self.assertEqual(test_metric.output, test_dict["output"]) 

1438 

1439 # Check that the put still works if a DatasetType is given with 

1440 # a definition matching this python type. 

1441 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1442 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1443 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1444 self.assertEqual(metric2_ref.datasetType, registry_type) 

1445 

1446 # The get will return the type expected by registry. 

1447 test_metric2 = butler.getDirect(metric2_ref) 

1448 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1449 

1450 # Make a new DatasetRef with the compatible but different DatasetType. 

1451 # This should now return a dict. 

1452 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1453 test_dict2 = butler.getDirect(new_ref) 

1454 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1455 

1456 # Get it again with the wrong dataset type definition using get() 

1457 # rather than getDirect(). This should be consistent with getDirect() 

1458 # behavior and return the type of the DatasetType. 

1459 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1460 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1461 

1462 def testPytypeCoercion(self): 

1463 """Test python type coercion on Butler.get and put.""" 

1464 

1465 # Store some data with the normal example storage class. 

1466 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1467 datasetTypeName = "test_metric" 

1468 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1469 

1470 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1471 metric = butler.get(datasetTypeName, dataId=dataId) 

1472 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1473 

1474 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1475 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1476 

1477 # Now need to hack the registry dataset type definition. 

1478 # There is no API for this. 

1479 manager = butler.registry._managers.datasets 

1480 manager._db.update( 

1481 manager._static.dataset_type, 

1482 {"name": datasetTypeName}, 

1483 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1484 ) 

1485 

1486 # Force reset of dataset type cache 

1487 butler.registry.refresh() 

1488 

1489 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1490 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1491 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1492 

1493 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1494 self.assertNotEqual(type(metric_model), type(metric)) 

1495 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1496 

1497 # Put the model and read it back to show that everything now 

1498 # works as normal. 

1499 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1500 metric_model_new = butler.get(metric_ref) 

1501 self.assertEqual(metric_model_new, metric_model) 

1502 

1503 # Hack the storage class again to something that will fail on the 

1504 # get with no conversion class. 

1505 manager._db.update( 

1506 manager._static.dataset_type, 

1507 {"name": datasetTypeName}, 

1508 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1509 ) 

1510 butler.registry.refresh() 

1511 

1512 with self.assertRaises(ValueError): 

1513 butler.get(datasetTypeName, dataId=dataId) 

1514 

1515 

1516@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1517class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1518 """PosixDatastore specialization of a butler using Postgres""" 

1519 

1520 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1521 fullConfigKey = ".datastore.formatters" 

1522 validationCanFail = True 

1523 datastoreStr = ["/tmp"] 

1524 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1525 registryStr = "PostgreSQL@test" 

1526 

1527 @staticmethod 

1528 def _handler(postgresql): 

1529 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1530 with engine.begin() as connection: 

1531 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1532 

1533 @classmethod 

1534 def setUpClass(cls): 

1535 # Create the postgres test server. 

1536 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1537 cache_initialized_db=True, on_initialized=cls._handler 

1538 ) 

1539 super().setUpClass() 

1540 

1541 @classmethod 

1542 def tearDownClass(cls): 

1543 # Clean up any lingering SQLAlchemy engines/connections 

1544 # so they're closed before we shut down the server. 

1545 gc.collect() 

1546 cls.postgresql.clear_cache() 

1547 super().tearDownClass() 

1548 

1549 def setUp(self): 

1550 self.server = self.postgresql() 

1551 

1552 # Need to add a registry section to the config. 

1553 self._temp_config = False 

1554 config = Config(self.configFile) 

1555 config["registry", "db"] = self.server.url() 

1556 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1557 config.dump(fh) 

1558 self.configFile = fh.name 

1559 self._temp_config = True 

1560 super().setUp() 

1561 

1562 def tearDown(self): 

1563 self.server.stop() 

1564 if self._temp_config and os.path.exists(self.configFile): 

1565 os.remove(self.configFile) 

1566 super().tearDown() 

1567 

1568 def testMakeRepo(self): 

1569 # The base class test assumes that it's using sqlite and assumes 

1570 # the config file is acceptable to sqlite. 

1571 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1572 

1573 

1574class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1575 """InMemoryDatastore specialization of a butler""" 

1576 

1577 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1578 fullConfigKey = None 

1579 useTempRoot = False 

1580 validationCanFail = False 

1581 datastoreStr = ["datastore='InMemory"] 

1582 datastoreName = ["InMemoryDatastore@"] 

1583 registryStr = "/gen3.sqlite3" 

1584 

1585 def testIngest(self): 

1586 pass 

1587 

1588 

1589class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1590 """PosixDatastore specialization""" 

1591 

1592 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1593 fullConfigKey = ".datastore.datastores.1.formatters" 

1594 validationCanFail = True 

1595 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1596 datastoreName = [ 

1597 "InMemoryDatastore@", 

1598 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1599 "SecondDatastore", 

1600 ] 

1601 registryStr = "/gen3.sqlite3" 

1602 

1603 

1604class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1605 """Test that a yaml file in one location can refer to a root in another.""" 

1606 

1607 datastoreStr = ["dir1"] 

1608 # Disable the makeRepo test since we are deliberately not using 

1609 # butler.yaml as the config name. 

1610 fullConfigKey = None 

1611 

1612 def setUp(self): 

1613 self.root = makeTestTempDir(TESTDIR) 

1614 

1615 # Make a new repository in one place 

1616 self.dir1 = os.path.join(self.root, "dir1") 

1617 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1618 

1619 # Move the yaml file to a different place and add a "root" 

1620 self.dir2 = os.path.join(self.root, "dir2") 

1621 os.makedirs(self.dir2, exist_ok=True) 

1622 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1623 config = Config(configFile1) 

1624 config["root"] = self.dir1 

1625 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1626 config.dumpToUri(configFile2) 

1627 os.remove(configFile1) 

1628 self.tmpConfigFile = configFile2 

1629 

1630 def testFileLocations(self): 

1631 self.assertNotEqual(self.dir1, self.dir2) 

1632 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1633 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1634 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1635 

1636 

1637class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1638 """Test that a config file created by makeRepo outside of repo works.""" 

1639 

1640 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1641 

1642 def setUp(self): 

1643 self.root = makeTestTempDir(TESTDIR) 

1644 self.root2 = makeTestTempDir(TESTDIR) 

1645 

1646 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1647 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1648 

1649 def tearDown(self): 

1650 if os.path.exists(self.root2): 

1651 shutil.rmtree(self.root2, ignore_errors=True) 

1652 super().tearDown() 

1653 

1654 def testConfigExistence(self): 

1655 c = Config(self.tmpConfigFile) 

1656 uri_config = ResourcePath(c["root"]) 

1657 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1658 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1659 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1660 

1661 def testPutGet(self): 

1662 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1663 self.runPutGetTest(storageClass, "test_metric") 

1664 

1665 

1666class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1667 """Test that a config file created by makeRepo outside of repo works.""" 

1668 

1669 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1670 

1671 def setUp(self): 

1672 self.root = makeTestTempDir(TESTDIR) 

1673 self.root2 = makeTestTempDir(TESTDIR) 

1674 

1675 self.tmpConfigFile = self.root2 

1676 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1677 

1678 def testConfigExistence(self): 

1679 # Append the yaml file else Config constructor does not know the file 

1680 # type. 

1681 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1682 super().testConfigExistence() 

1683 

1684 

1685class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1686 """Test that a config file created by makeRepo outside of repo works.""" 

1687 

1688 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1689 

1690 def setUp(self): 

1691 self.root = makeTestTempDir(TESTDIR) 

1692 self.root2 = makeTestTempDir(TESTDIR) 

1693 

1694 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1695 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1696 

1697 

1698@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1699class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1700 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1701 a local in-memory SqlRegistry. 

1702 """ 

1703 

1704 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1705 fullConfigKey = None 

1706 validationCanFail = True 

1707 

1708 bucketName = "anybucketname" 

1709 """Name of the Bucket that will be used in the tests. The name is read from 

1710 the config file used with the tests during set-up. 

1711 """ 

1712 

1713 root = "butlerRoot/" 

1714 """Root repository directory expected to be used in case useTempRoot=False. 

1715 Otherwise the root is set to a 20 characters long randomly generated string 

1716 during set-up. 

1717 """ 

1718 

1719 datastoreStr = [f"datastore={root}"] 

1720 """Contains all expected root locations in a format expected to be 

1721 returned by Butler stringification. 

1722 """ 

1723 

1724 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1725 """The expected format of the S3 Datastore string.""" 

1726 

1727 registryStr = "/gen3.sqlite3" 

1728 """Expected format of the Registry string.""" 

1729 

1730 mock_s3 = mock_s3() 

1731 """The mocked s3 interface from moto.""" 

1732 

1733 def genRoot(self): 

1734 """Returns a random string of len 20 to serve as a root 

1735 name for the temporary bucket repo. 

1736 

1737 This is equivalent to tempfile.mkdtemp as this is what self.root 

1738 becomes when useTempRoot is True. 

1739 """ 

1740 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1741 return rndstr + "/" 

1742 

1743 def setUp(self): 

1744 config = Config(self.configFile) 

1745 uri = ResourcePath(config[".datastore.datastore.root"]) 

1746 self.bucketName = uri.netloc 

1747 

1748 # Enable S3 mocking of tests. 

1749 self.mock_s3.start() 

1750 

1751 # set up some fake credentials if they do not exist 

1752 self.usingDummyCredentials = setAwsEnvCredentials() 

1753 

1754 if self.useTempRoot: 

1755 self.root = self.genRoot() 

1756 rooturi = f"s3://{self.bucketName}/{self.root}" 

1757 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1758 

1759 # need local folder to store registry database 

1760 self.reg_dir = makeTestTempDir(TESTDIR) 

1761 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1762 

1763 # MOTO needs to know that we expect Bucket bucketname to exist 

1764 # (this used to be the class attribute bucketName) 

1765 s3 = boto3.resource("s3") 

1766 s3.create_bucket(Bucket=self.bucketName) 

1767 

1768 self.datastoreStr = f"datastore={self.root}" 

1769 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1770 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1771 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1772 

1773 def tearDown(self): 

1774 s3 = boto3.resource("s3") 

1775 bucket = s3.Bucket(self.bucketName) 

1776 try: 

1777 bucket.objects.all().delete() 

1778 except botocore.exceptions.ClientError as e: 

1779 if e.response["Error"]["Code"] == "404": 

1780 # the key was not reachable - pass 

1781 pass 

1782 else: 

1783 raise 

1784 

1785 bucket = s3.Bucket(self.bucketName) 

1786 bucket.delete() 

1787 

1788 # Stop the S3 mock. 

1789 self.mock_s3.stop() 

1790 

1791 # unset any potentially set dummy credentials 

1792 if self.usingDummyCredentials: 

1793 unsetAwsEnvCredentials() 

1794 

1795 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1796 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1797 

1798 if self.useTempRoot and os.path.exists(self.root): 

1799 shutil.rmtree(self.root, ignore_errors=True) 

1800 

1801 super().tearDown() 

1802 

1803 

1804@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1805class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1806 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1807 a local in-memory SqlRegistry. 

1808 """ 

1809 

1810 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1811 fullConfigKey = None 

1812 validationCanFail = True 

1813 

1814 serverName = "localhost" 

1815 """Name of the server that will be used in the tests. 

1816 """ 

1817 

1818 portNumber = 8080 

1819 """Port on which the webdav server listens. Automatically chosen 

1820 at setUpClass via the _getfreeport() method 

1821 """ 

1822 

1823 root = "butlerRoot/" 

1824 """Root repository directory expected to be used in case useTempRoot=False. 

1825 Otherwise the root is set to a 20 characters long randomly generated string 

1826 during set-up. 

1827 """ 

1828 

1829 datastoreStr = [f"datastore={root}"] 

1830 """Contains all expected root locations in a format expected to be 

1831 returned by Butler stringification. 

1832 """ 

1833 

1834 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1835 """The expected format of the WebdavDatastore string.""" 

1836 

1837 registryStr = "/gen3.sqlite3" 

1838 """Expected format of the Registry string.""" 

1839 

1840 serverThread = None 

1841 """Thread in which the local webdav server will run""" 

1842 

1843 stopWebdavServer = False 

1844 """This flag will cause the webdav server to 

1845 gracefully shut down when True 

1846 """ 

1847 

1848 def genRoot(self): 

1849 """Returns a random string of len 20 to serve as a root 

1850 name for the temporary bucket repo. 

1851 

1852 This is equivalent to tempfile.mkdtemp as this is what self.root 

1853 becomes when useTempRoot is True. 

1854 """ 

1855 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1856 return rndstr + "/" 

1857 

1858 @classmethod 

1859 def setUpClass(cls): 

1860 # Do the same as inherited class 

1861 cls.storageClassFactory = StorageClassFactory() 

1862 cls.storageClassFactory.addFromConfig(cls.configFile) 

1863 

1864 cls.portNumber = cls._getfreeport() 

1865 # Run a local webdav server on which tests will be run 

1866 cls.serverThread = Thread( 

1867 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1868 ) 

1869 cls.serverThread.start() 

1870 # Wait for it to start 

1871 time.sleep(3) 

1872 

1873 @classmethod 

1874 def tearDownClass(cls): 

1875 # Ask for graceful shut down of the webdav server 

1876 cls.stopWebdavServer = True 

1877 # Wait for the thread to exit 

1878 cls.serverThread.join() 

1879 super().tearDownClass() 

1880 

1881 def setUp(self): 

1882 config = Config(self.configFile) 

1883 

1884 if self.useTempRoot: 

1885 self.root = self.genRoot() 

1886 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1887 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1888 

1889 # need local folder to store registry database 

1890 self.reg_dir = makeTestTempDir(TESTDIR) 

1891 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1892 

1893 self.datastoreStr = f"datastore={self.root}" 

1894 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1895 

1896 if not _is_webdav_endpoint(self.rooturi): 

1897 raise OSError("Webdav server not running properly: cannot run tests.") 

1898 

1899 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1900 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1901 

1902 def tearDown(self): 

1903 # Clear temporary directory 

1904 ResourcePath(self.rooturi).remove() 

1905 ResourcePath(self.rooturi).session.close() 

1906 

1907 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1908 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1909 

1910 if self.useTempRoot and os.path.exists(self.root): 

1911 shutil.rmtree(self.root, ignore_errors=True) 

1912 

1913 super().tearDown() 

1914 

1915 def _serveWebdav(self, port: int, stopWebdavServer): 

1916 """Starts a local webdav-compatible HTTP server, 

1917 Listening on http://localhost:port 

1918 This server only runs when this test class is instantiated, 

1919 and then shuts down. Must be started is a separate thread. 

1920 

1921 Parameters 

1922 ---------- 

1923 port : `int` 

1924 The port number on which the server should listen 

1925 """ 

1926 root_path = gettempdir() 

1927 

1928 config = { 

1929 "host": "0.0.0.0", 

1930 "port": port, 

1931 "provider_mapping": {"/": root_path}, 

1932 "http_authenticator": {"domain_controller": None}, 

1933 "simple_dc": {"user_mapping": {"*": True}}, 

1934 "verbose": 0, 

1935 } 

1936 app = WsgiDAVApp(config) 

1937 

1938 server_args = { 

1939 "bind_addr": (config["host"], config["port"]), 

1940 "wsgi_app": app, 

1941 } 

1942 server = wsgi.Server(**server_args) 

1943 server.prepare() 

1944 

1945 try: 

1946 # Start the actual server in a separate thread 

1947 t = Thread(target=server.serve, daemon=True) 

1948 t.start() 

1949 # watch stopWebdavServer, and gracefully 

1950 # shut down the server when True 

1951 while True: 

1952 if stopWebdavServer(): 

1953 break 

1954 time.sleep(1) 

1955 except KeyboardInterrupt: 

1956 print("Caught Ctrl-C, shutting down...") 

1957 finally: 

1958 server.stop() 

1959 t.join() 

1960 

1961 def _getfreeport(): 

1962 """ 

1963 Determines a free port using sockets. 

1964 """ 

1965 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1966 free_socket.bind(("127.0.0.1", 0)) 

1967 free_socket.listen() 

1968 port = free_socket.getsockname()[1] 

1969 free_socket.close() 

1970 return port 

1971 

1972 

1973class PosixDatastoreTransfers(unittest.TestCase): 

1974 """Test data transfers between butlers. 

1975 

1976 Test for different managers. UUID to UUID and integer to integer are 

1977 tested. UUID to integer is not supported since we do not currently 

1978 want to allow that. Integer to UUID is supported with the caveat 

1979 that UUID4 will be generated and this will be incorrect for raw 

1980 dataset types. The test ignores that. 

1981 """ 

1982 

1983 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1984 

1985 @classmethod 

1986 def setUpClass(cls): 

1987 cls.storageClassFactory = StorageClassFactory() 

1988 cls.storageClassFactory.addFromConfig(cls.configFile) 

1989 

1990 def setUp(self): 

1991 self.root = makeTestTempDir(TESTDIR) 

1992 self.config = Config(self.configFile) 

1993 

1994 def tearDown(self): 

1995 removeTestTempDir(self.root) 

1996 

1997 def create_butler(self, manager, label): 

1998 config = Config(self.configFile) 

1999 config["registry", "managers", "datasets"] = manager 

2000 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2001 

2002 def create_butlers(self, manager1, manager2): 

2003 self.source_butler = self.create_butler(manager1, "1") 

2004 self.target_butler = self.create_butler(manager2, "2") 

2005 

2006 def testTransferUuidToUuid(self): 

2007 self.create_butlers( 

2008 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2009 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2010 ) 

2011 # Setting id_gen_map should have no effect here 

2012 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2013 

2014 def testTransferIntToInt(self): 

2015 with self.assertWarns(FutureWarning): 

2016 self.create_butlers( 

2017 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2018 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2019 ) 

2020 # int dataset ID only allows UNIQUE 

2021 self.assertButlerTransfers() 

2022 

2023 def testTransferIntToUuid(self): 

2024 with self.assertWarns(FutureWarning): 

2025 self.create_butlers( 

2026 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2027 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2028 ) 

2029 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2030 

2031 def testTransferMissing(self): 

2032 """Test transfers where datastore records are missing. 

2033 

2034 This is how execution butler works. 

2035 """ 

2036 self.create_butlers( 

2037 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2038 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2039 ) 

2040 

2041 # Configure the source butler to allow trust. 

2042 self.source_butler.datastore.trustGetRequest = True 

2043 

2044 self.assertButlerTransfers(purge=True) 

2045 

2046 def testTransferMissingDisassembly(self): 

2047 """Test transfers where datastore records are missing. 

2048 

2049 This is how execution butler works. 

2050 """ 

2051 self.create_butlers( 

2052 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2053 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2054 ) 

2055 

2056 # Configure the source butler to allow trust. 

2057 self.source_butler.datastore.trustGetRequest = True 

2058 

2059 # Test disassembly. 

2060 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2061 

2062 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2063 """Test that a run can be transferred to another butler.""" 

2064 

2065 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2066 datasetTypeName = "random_data" 

2067 

2068 # Test will create 3 collections and we will want to transfer 

2069 # two of those three. 

2070 runs = ["run1", "run2", "other"] 

2071 

2072 # Also want to use two different dataset types to ensure that 

2073 # grouping works. 

2074 datasetTypeNames = ["random_data", "random_data_2"] 

2075 

2076 # Create the run collections in the source butler. 

2077 for run in runs: 

2078 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2079 

2080 # Create dimensions in both butlers (transfer will not create them). 

2081 n_exposures = 30 

2082 for butler in (self.source_butler, self.target_butler): 

2083 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2084 butler.registry.insertDimensionData( 

2085 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2086 ) 

2087 butler.registry.insertDimensionData( 

2088 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2089 ) 

2090 

2091 for i in range(n_exposures): 

2092 butler.registry.insertDimensionData( 

2093 "exposure", 

2094 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2095 ) 

2096 

2097 # Create dataset types in the source butler. 

2098 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

2099 for datasetTypeName in datasetTypeNames: 

2100 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2101 self.source_butler.registry.registerDatasetType(datasetType) 

2102 

2103 # Write a dataset to an unrelated run -- this will ensure that 

2104 # we are rewriting integer dataset ids in the target if necessary. 

2105 # Will not be relevant for UUID. 

2106 run = "distraction" 

2107 butler = Butler(butler=self.source_butler, run=run) 

2108 butler.put( 

2109 makeExampleMetrics(), 

2110 datasetTypeName, 

2111 exposure=1, 

2112 instrument="DummyCamComp", 

2113 physical_filter="d-r", 

2114 ) 

2115 

2116 # Write some example metrics to the source 

2117 butler = Butler(butler=self.source_butler) 

2118 

2119 # Set of DatasetRefs that should be in the list of refs to transfer 

2120 # but which will not be transferred. 

2121 deleted = set() 

2122 

2123 n_expected = 20 # Number of datasets expected to be transferred 

2124 source_refs = [] 

2125 for i in range(n_exposures): 

2126 # Put a third of datasets into each collection, only retain 

2127 # two thirds. 

2128 index = i % 3 

2129 run = runs[index] 

2130 datasetTypeName = datasetTypeNames[i % 2] 

2131 

2132 metric_data = { 

2133 "summary": {"counter": i}, 

2134 "output": {"text": "metric"}, 

2135 "data": [2 * x for x in range(i)], 

2136 } 

2137 metric = MetricsExample(**metric_data) 

2138 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2139 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2140 

2141 # Remove the datastore record using low-level API 

2142 if purge: 

2143 # Remove records for a fraction. 

2144 if index == 1: 

2145 

2146 # For one of these delete the file as well. 

2147 # This allows the "missing" code to filter the 

2148 # file out. 

2149 if not deleted: 

2150 primary, uris = butler.datastore.getURIs(ref) 

2151 if primary: 

2152 primary.remove() 

2153 for uri in uris.values(): 

2154 uri.remove() 

2155 n_expected -= 1 

2156 deleted.add(ref) 

2157 

2158 # Remove the datastore record. 

2159 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2160 

2161 if index < 2: 

2162 source_refs.append(ref) 

2163 if ref not in deleted: 

2164 new_metric = butler.get(ref.unresolved(), collections=run) 

2165 self.assertEqual(new_metric, metric) 

2166 

2167 # Create some bad dataset types to ensure we check for inconsistent 

2168 # definitions. 

2169 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2170 for datasetTypeName in datasetTypeNames: 

2171 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2172 self.target_butler.registry.registerDatasetType(datasetType) 

2173 with self.assertRaises(ConflictingDefinitionError): 

2174 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2175 # And remove the bad definitions. 

2176 for datasetTypeName in datasetTypeNames: 

2177 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2178 

2179 # Transfer without creating dataset types should fail. 

2180 with self.assertRaises(KeyError): 

2181 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2182 

2183 # Now transfer them to the second butler 

2184 with self.assertLogs(level=logging.DEBUG) as cm: 

2185 transferred = self.target_butler.transfer_from( 

2186 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2187 ) 

2188 self.assertEqual(len(transferred), n_expected) 

2189 log_output = ";".join(cm.output) 

2190 self.assertIn("found in datastore for chunk", log_output) 

2191 self.assertIn("Creating output run", log_output) 

2192 

2193 # Do the transfer twice to ensure that it will do nothing extra. 

2194 # Only do this if purge=True because it does not work for int 

2195 # dataset_id. 

2196 if purge: 

2197 # This should not need to register dataset types. 

2198 transferred = self.target_butler.transfer_from( 

2199 self.source_butler, source_refs, id_gen_map=id_gen_map 

2200 ) 

2201 self.assertEqual(len(transferred), n_expected) 

2202 

2203 # Also do an explicit low-level transfer to trigger some 

2204 # edge cases. 

2205 with self.assertLogs(level=logging.DEBUG) as cm: 

2206 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2207 log_output = ";".join(cm.output) 

2208 self.assertIn("no file artifacts exist", log_output) 

2209 

2210 with self.assertRaises(TypeError): 

2211 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2212 

2213 with self.assertRaises(ValueError): 

2214 self.target_butler.datastore.transfer_from( 

2215 self.source_butler.datastore, source_refs, transfer="split" 

2216 ) 

2217 

2218 # Now try to get the same refs from the new butler. 

2219 for ref in source_refs: 

2220 if ref not in deleted: 

2221 unresolved_ref = ref.unresolved() 

2222 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2223 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2224 self.assertEqual(new_metric, old_metric) 

2225 

2226 # Now prune run2 collection and create instead a CHAINED collection. 

2227 # This should block the transfer. 

2228 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2229 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2230 with self.assertRaises(CollectionTypeError): 

2231 # Re-importing the run1 datasets can be problematic if they 

2232 # use integer IDs so filter those out. 

2233 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2234 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2235 

2236 

2237if __name__ == "__main__": 2237 ↛ 2238line 2237 didn't jump to line 2238, because the condition on line 2237 was never true

2238 unittest.main()