Coverage for tests/test_butler.py: 14%

1215 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-04 02:19 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionType, 

75 Config, 

76 DatasetIdGenEnum, 

77 DatasetRef, 

78 DatasetType, 

79 FileDataset, 

80 FileTemplate, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, ("other",)) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testIngest(self): 

658 butler = Butler(self.tmpConfigFile, run=self.default_run) 

659 

660 # Create and register a DatasetType 

661 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

662 

663 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

664 datasetTypeName = "metric" 

665 

666 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

667 

668 # Add needed Dimensions 

669 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

670 butler.registry.insertDimensionData( 

671 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

672 ) 

673 for detector in (1, 2): 

674 butler.registry.insertDimensionData( 

675 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

676 ) 

677 

678 butler.registry.insertDimensionData( 

679 "visit", 

680 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

681 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

682 ) 

683 

684 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

685 dataRoot = os.path.join(TESTDIR, "data", "basic") 

686 datasets = [] 

687 for detector in (1, 2): 

688 detector_name = f"detector_{detector}" 

689 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

690 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

691 # Create a DatasetRef for ingest 

692 refIn = DatasetRef(datasetType, dataId, id=None) 

693 

694 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

695 

696 butler.ingest(*datasets, transfer="copy") 

697 

698 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

699 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

700 

701 metrics1 = butler.get(datasetTypeName, dataId1) 

702 metrics2 = butler.get(datasetTypeName, dataId2) 

703 self.assertNotEqual(metrics1, metrics2) 

704 

705 # Compare URIs 

706 uri1 = butler.getURI(datasetTypeName, dataId1) 

707 uri2 = butler.getURI(datasetTypeName, dataId2) 

708 self.assertNotEqual(uri1, uri2) 

709 

710 # Now do a multi-dataset but single file ingest 

711 metricFile = os.path.join(dataRoot, "detectors.yaml") 

712 refs = [] 

713 for detector in (1, 2): 

714 detector_name = f"detector_{detector}" 

715 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

716 # Create a DatasetRef for ingest 

717 refs.append(DatasetRef(datasetType, dataId, id=None)) 

718 

719 datasets = [] 

720 datasets.append(FileDataset(path=metricFile, refs=refs, formatter=MultiDetectorFormatter)) 

721 

722 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

723 

724 # Check that the datastore recorded no file size. 

725 # Not all datastores can support this. 

726 try: 

727 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

728 self.assertEqual(infos[0].file_size, -1) 

729 except AttributeError: 

730 pass 

731 

732 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

733 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

734 

735 multi1 = butler.get(datasetTypeName, dataId1) 

736 multi2 = butler.get(datasetTypeName, dataId2) 

737 

738 self.assertEqual(multi1, metrics1) 

739 self.assertEqual(multi2, metrics2) 

740 

741 # Compare URIs 

742 uri1 = butler.getURI(datasetTypeName, dataId1) 

743 uri2 = butler.getURI(datasetTypeName, dataId2) 

744 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

745 

746 # Test that removing one does not break the second 

747 # This line will issue a warning log message for a ChainedDatastore 

748 # that uses an InMemoryDatastore since in-memory can not ingest 

749 # files. 

750 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

751 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

752 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

753 multi2b = butler.get(datasetTypeName, dataId2) 

754 self.assertEqual(multi2, multi2b) 

755 

756 def testPruneCollections(self): 

757 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

758 butler = Butler(self.tmpConfigFile, writeable=True) 

759 # Load registry data with dimensions to hang datasets off of. 

760 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

761 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

762 # Add some RUN-type collections. 

763 run1 = "run1" 

764 butler.registry.registerRun(run1) 

765 run2 = "run2" 

766 butler.registry.registerRun(run2) 

767 # put some datasets. ref1 and ref2 have the same data ID, and are in 

768 # different runs. ref3 has a different data ID. 

769 metric = makeExampleMetrics() 

770 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

771 datasetType = self.addDatasetType( 

772 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

773 ) 

774 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

775 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

776 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

777 

778 # Try to delete a RUN collection without purge, or with purge and not 

779 # unstore. 

780 with self.assertRaises(TypeError): 

781 butler.pruneCollection(run1) 

782 with self.assertRaises(TypeError): 

783 butler.pruneCollection(run2, purge=True) 

784 # Add a TAGGED collection and associate ref3 only into it. 

785 tag1 = "tag1" 

786 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

787 self.assertTrue(registered) 

788 # Registering a second time should be allowed. 

789 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

790 self.assertFalse(registered) 

791 butler.registry.associate(tag1, [ref3]) 

792 # Add a CHAINED collection that searches run1 and then run2. It 

793 # logically contains only ref1, because ref2 is shadowed due to them 

794 # having the same data ID and dataset type. 

795 chain1 = "chain1" 

796 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

797 butler.registry.setCollectionChain(chain1, [run1, run2]) 

798 # Try to delete RUN collections, which should fail with complete 

799 # rollback because they're still referenced by the CHAINED 

800 # collection. 

801 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

802 butler.pruneCollection(run1, purge=True, unstore=True) 

803 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

804 butler.pruneCollection(run2, purge=True, unstore=True) 

805 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

806 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

807 self.assertTrue(existence[ref1]) 

808 self.assertTrue(existence[ref2]) 

809 self.assertTrue(existence[ref3]) 

810 # Try to delete CHAINED and TAGGED collections with purge; should not 

811 # work. 

812 with self.assertRaises(TypeError): 

813 butler.pruneCollection(tag1, purge=True, unstore=True) 

814 with self.assertRaises(TypeError): 

815 butler.pruneCollection(chain1, purge=True, unstore=True) 

816 # Remove the tagged collection with unstore=False. This should not 

817 # affect the datasets. 

818 butler.pruneCollection(tag1) 

819 with self.assertRaises(MissingCollectionError): 

820 butler.registry.getCollectionType(tag1) 

821 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

822 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

823 self.assertTrue(existence[ref1]) 

824 self.assertTrue(existence[ref2]) 

825 self.assertTrue(existence[ref3]) 

826 # Add the tagged collection back in, and remove it with unstore=True. 

827 # This should remove ref3 only from the datastore. 

828 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

829 butler.registry.associate(tag1, [ref3]) 

830 butler.pruneCollection(tag1, unstore=True) 

831 with self.assertRaises(MissingCollectionError): 

832 butler.registry.getCollectionType(tag1) 

833 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

834 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

835 self.assertTrue(existence[ref1]) 

836 self.assertTrue(existence[ref2]) 

837 self.assertFalse(existence[ref3]) 

838 # Delete the chain with unstore=False. The datasets should not be 

839 # affected at all. 

840 butler.pruneCollection(chain1) 

841 with self.assertRaises(MissingCollectionError): 

842 butler.registry.getCollectionType(chain1) 

843 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

844 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

845 self.assertTrue(existence[ref1]) 

846 self.assertTrue(existence[ref2]) 

847 self.assertFalse(existence[ref3]) 

848 # Redefine and then delete the chain with unstore=True. Only ref1 

849 # should be unstored (ref3 has already been unstored, but otherwise 

850 # would be now). 

851 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

852 butler.registry.setCollectionChain(chain1, [run1, run2]) 

853 butler.pruneCollection(chain1, unstore=True) 

854 with self.assertRaises(MissingCollectionError): 

855 butler.registry.getCollectionType(chain1) 

856 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

857 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

858 self.assertFalse(existence[ref1]) 

859 self.assertTrue(existence[ref2]) 

860 self.assertFalse(existence[ref3]) 

861 # Remove run1. This removes ref1 and ref3 from the registry (they're 

862 # already gone from the datastore, which is fine). 

863 butler.pruneCollection(run1, purge=True, unstore=True) 

864 with self.assertRaises(MissingCollectionError): 

865 butler.registry.getCollectionType(run1) 

866 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

867 self.assertTrue(butler.datastore.exists(ref2)) 

868 # Remove run2. This removes ref2 from the registry and the datastore. 

869 butler.pruneCollection(run2, purge=True, unstore=True) 

870 with self.assertRaises(MissingCollectionError): 

871 butler.registry.getCollectionType(run2) 

872 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

873 

874 # Now that the collections have been pruned we can remove the 

875 # dataset type 

876 butler.registry.removeDatasetType(datasetType.name) 

877 

878 def testPickle(self): 

879 """Test pickle support.""" 

880 butler = Butler(self.tmpConfigFile, run=self.default_run) 

881 butlerOut = pickle.loads(pickle.dumps(butler)) 

882 self.assertIsInstance(butlerOut, Butler) 

883 self.assertEqual(butlerOut._config, butler._config) 

884 self.assertEqual(butlerOut.collections, butler.collections) 

885 self.assertEqual(butlerOut.run, butler.run) 

886 

887 def testGetDatasetTypes(self): 

888 butler = Butler(self.tmpConfigFile, run=self.default_run) 

889 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

890 dimensionEntries = [ 

891 ( 

892 "instrument", 

893 {"instrument": "DummyCam"}, 

894 {"instrument": "DummyHSC"}, 

895 {"instrument": "DummyCamComp"}, 

896 ), 

897 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

898 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

899 ] 

900 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

901 # Add needed Dimensions 

902 for args in dimensionEntries: 

903 butler.registry.insertDimensionData(*args) 

904 

905 # When a DatasetType is added to the registry entries are not created 

906 # for components but querying them can return the components. 

907 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

908 components = set() 

909 for datasetTypeName in datasetTypeNames: 

910 # Create and register a DatasetType 

911 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

912 

913 for componentName in storageClass.components: 

914 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

915 

916 fromRegistry = set(butler.registry.queryDatasetTypes(components=True)) 

917 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

918 

919 # Now that we have some dataset types registered, validate them 

920 butler.validateConfiguration( 

921 ignore=[ 

922 "test_metric_comp", 

923 "metric3", 

924 "metric5", 

925 "calexp", 

926 "DummySC", 

927 "datasetType.component", 

928 "random_data", 

929 "random_data_2", 

930 ] 

931 ) 

932 

933 # Add a new datasetType that will fail template validation 

934 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

935 if self.validationCanFail: 

936 with self.assertRaises(ValidationError): 

937 butler.validateConfiguration() 

938 

939 # Rerun validation but with a subset of dataset type names 

940 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

941 

942 # Rerun validation but ignore the bad datasetType 

943 butler.validateConfiguration( 

944 ignore=[ 

945 "test_metric_comp", 

946 "metric3", 

947 "metric5", 

948 "calexp", 

949 "DummySC", 

950 "datasetType.component", 

951 "random_data", 

952 "random_data_2", 

953 ] 

954 ) 

955 

956 def testTransaction(self): 

957 butler = Butler(self.tmpConfigFile, run=self.default_run) 

958 datasetTypeName = "test_metric" 

959 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

960 dimensionEntries = ( 

961 ("instrument", {"instrument": "DummyCam"}), 

962 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

963 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

964 ) 

965 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

966 metric = makeExampleMetrics() 

967 dataId = {"instrument": "DummyCam", "visit": 42} 

968 # Create and register a DatasetType 

969 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

970 with self.assertRaises(TransactionTestError): 

971 with butler.transaction(): 

972 # Add needed Dimensions 

973 for args in dimensionEntries: 

974 butler.registry.insertDimensionData(*args) 

975 # Store a dataset 

976 ref = butler.put(metric, datasetTypeName, dataId) 

977 self.assertIsInstance(ref, DatasetRef) 

978 # Test getDirect 

979 metricOut = butler.getDirect(ref) 

980 self.assertEqual(metric, metricOut) 

981 # Test get 

982 metricOut = butler.get(datasetTypeName, dataId) 

983 self.assertEqual(metric, metricOut) 

984 # Check we can get components 

985 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

986 raise TransactionTestError("This should roll back the entire transaction") 

987 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

988 butler.registry.expandDataId(dataId) 

989 # Should raise LookupError for missing data ID value 

990 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

991 butler.get(datasetTypeName, dataId) 

992 # Also check explicitly if Dataset entry is missing 

993 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

994 # Direct retrieval should not find the file in the Datastore 

995 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

996 butler.getDirect(ref) 

997 

998 def testMakeRepo(self): 

999 """Test that we can write butler configuration to a new repository via 

1000 the Butler.makeRepo interface and then instantiate a butler from the 

1001 repo root. 

1002 """ 

1003 # Do not run the test if we know this datastore configuration does 

1004 # not support a file system root 

1005 if self.fullConfigKey is None: 

1006 return 

1007 

1008 # create two separate directories 

1009 root1 = tempfile.mkdtemp(dir=self.root) 

1010 root2 = tempfile.mkdtemp(dir=self.root) 

1011 

1012 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1013 limited = Config(self.configFile) 

1014 butler1 = Butler(butlerConfig) 

1015 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1016 full = Config(self.tmpConfigFile) 

1017 butler2 = Butler(butlerConfig) 

1018 # Butlers should have the same configuration regardless of whether 

1019 # defaults were expanded. 

1020 self.assertEqual(butler1._config, butler2._config) 

1021 # Config files loaded directly should not be the same. 

1022 self.assertNotEqual(limited, full) 

1023 # Make sure "limited" doesn't have a few keys we know it should be 

1024 # inheriting from defaults. 

1025 self.assertIn(self.fullConfigKey, full) 

1026 self.assertNotIn(self.fullConfigKey, limited) 

1027 

1028 # Collections don't appear until something is put in them 

1029 collections1 = set(butler1.registry.queryCollections()) 

1030 self.assertEqual(collections1, set()) 

1031 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1032 

1033 # Check that a config with no associated file name will not 

1034 # work properly with relocatable Butler repo 

1035 butlerConfig.configFile = None 

1036 with self.assertRaises(ValueError): 

1037 Butler(butlerConfig) 

1038 

1039 with self.assertRaises(FileExistsError): 

1040 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1041 

1042 def testStringification(self): 

1043 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1044 butlerStr = str(butler) 

1045 

1046 if self.datastoreStr is not None: 

1047 for testStr in self.datastoreStr: 

1048 self.assertIn(testStr, butlerStr) 

1049 if self.registryStr is not None: 

1050 self.assertIn(self.registryStr, butlerStr) 

1051 

1052 datastoreName = butler.datastore.name 

1053 if self.datastoreName is not None: 

1054 for testStr in self.datastoreName: 

1055 self.assertIn(testStr, datastoreName) 

1056 

1057 def testButlerRewriteDataId(self): 

1058 """Test that dataIds can be rewritten based on dimension records.""" 

1059 

1060 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1061 

1062 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1063 datasetTypeName = "random_data" 

1064 

1065 # Create dimension records. 

1066 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1067 butler.registry.insertDimensionData( 

1068 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1069 ) 

1070 butler.registry.insertDimensionData( 

1071 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1072 ) 

1073 

1074 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1075 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1076 butler.registry.registerDatasetType(datasetType) 

1077 

1078 n_exposures = 5 

1079 dayobs = 20210530 

1080 

1081 for i in range(n_exposures): 

1082 butler.registry.insertDimensionData( 

1083 "exposure", 

1084 { 

1085 "instrument": "DummyCamComp", 

1086 "id": i, 

1087 "obs_id": f"exp{i}", 

1088 "seq_num": i, 

1089 "day_obs": dayobs, 

1090 "physical_filter": "d-r", 

1091 }, 

1092 ) 

1093 

1094 # Write some data. 

1095 for i in range(n_exposures): 

1096 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1097 

1098 # Use the seq_num for the put to test rewriting. 

1099 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1100 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1101 

1102 # Check that the exposure is correct in the dataId 

1103 self.assertEqual(ref.dataId["exposure"], i) 

1104 

1105 # and check that we can get the dataset back with the same dataId 

1106 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1107 self.assertEqual(new_metric, metric) 

1108 

1109 

1110class FileDatastoreButlerTests(ButlerTests): 

1111 """Common tests and specialization of ButlerTests for butlers backed 

1112 by datastores that inherit from FileDatastore. 

1113 """ 

1114 

1115 def checkFileExists(self, root, relpath): 

1116 """Checks if file exists at a given path (relative to root). 

1117 

1118 Test testPutTemplates verifies actual physical existance of the files 

1119 in the requested location. 

1120 """ 

1121 uri = ResourcePath(root, forceDirectory=True) 

1122 return uri.join(relpath).exists() 

1123 

1124 def testPutTemplates(self): 

1125 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1126 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1127 

1128 # Add needed Dimensions 

1129 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1130 butler.registry.insertDimensionData( 

1131 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1132 ) 

1133 butler.registry.insertDimensionData( 

1134 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1135 ) 

1136 butler.registry.insertDimensionData( 

1137 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1138 ) 

1139 

1140 # Create and store a dataset 

1141 metric = makeExampleMetrics() 

1142 

1143 # Create two almost-identical DatasetTypes (both will use default 

1144 # template) 

1145 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1146 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1147 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1148 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1149 

1150 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1151 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1152 

1153 # Put with exactly the data ID keys needed 

1154 ref = butler.put(metric, "metric1", dataId1) 

1155 uri = butler.getURI(ref) 

1156 self.assertTrue( 

1157 self.checkFileExists( 

1158 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1159 ), 

1160 f"Checking existence of {uri}", 

1161 ) 

1162 

1163 # Check the template based on dimensions 

1164 butler.datastore.templates.validateTemplates([ref]) 

1165 

1166 # Put with extra data ID keys (physical_filter is an optional 

1167 # dependency); should not change template (at least the way we're 

1168 # defining them to behave now; the important thing is that they 

1169 # must be consistent). 

1170 ref = butler.put(metric, "metric2", dataId2) 

1171 uri = butler.getURI(ref) 

1172 self.assertTrue( 

1173 self.checkFileExists( 

1174 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1175 ), 

1176 f"Checking existence of {uri}", 

1177 ) 

1178 

1179 # Check the template based on dimensions 

1180 butler.datastore.templates.validateTemplates([ref]) 

1181 

1182 # Use a template that has a typo in dimension record metadata. 

1183 # Easier to test with a butler that has a ref with records attached. 

1184 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1185 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1186 path = template.format(ref) 

1187 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1188 

1189 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1190 with self.assertRaises(KeyError): 

1191 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1192 template.format(ref) 

1193 

1194 # Now use a file template that will not result in unique filenames 

1195 with self.assertRaises(FileTemplateValidationError): 

1196 butler.put(metric, "metric3", dataId1) 

1197 

1198 def testImportExport(self): 

1199 # Run put/get tests just to create and populate a repo. 

1200 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1201 self.runImportExportTest(storageClass) 

1202 

1203 @unittest.expectedFailure 

1204 def testImportExportVirtualComposite(self): 

1205 # Run put/get tests just to create and populate a repo. 

1206 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1207 self.runImportExportTest(storageClass) 

1208 

1209 def runImportExportTest(self, storageClass): 

1210 """This test does an export to a temp directory and an import back 

1211 into a new temp directory repo. It does not assume a posix datastore""" 

1212 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1213 print("Root:", exportButler.datastore.root) 

1214 # Test that the repo actually has at least one dataset. 

1215 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1216 self.assertGreater(len(datasets), 0) 

1217 # Add a DimensionRecord that's unused by those datasets. 

1218 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1219 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1220 # Export and then import datasets. 

1221 with safeTestTempDir(TESTDIR) as exportDir: 

1222 exportFile = os.path.join(exportDir, "exports.yaml") 

1223 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1224 export.saveDatasets(datasets) 

1225 # Export the same datasets again. This should quietly do 

1226 # nothing because of internal deduplication, and it shouldn't 

1227 # complain about being asked to export the "htm7" elements even 

1228 # though there aren't any in these datasets or in the database. 

1229 export.saveDatasets(datasets, elements=["htm7"]) 

1230 # Save one of the data IDs again; this should be harmless 

1231 # because of internal deduplication. 

1232 export.saveDataIds([datasets[0].dataId]) 

1233 # Save some dimension records directly. 

1234 export.saveDimensionData("skymap", [skymapRecord]) 

1235 self.assertTrue(os.path.exists(exportFile)) 

1236 with safeTestTempDir(TESTDIR) as importDir: 

1237 # We always want this to be a local posix butler 

1238 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1239 # Calling script.butlerImport tests the implementation of the 

1240 # butler command line interface "import" subcommand. Functions 

1241 # in the script folder are generally considered protected and 

1242 # should not be used as public api. 

1243 with open(exportFile, "r") as f: 

1244 script.butlerImport( 

1245 importDir, 

1246 export_file=f, 

1247 directory=exportDir, 

1248 transfer="auto", 

1249 skip_dimensions=None, 

1250 reuse_ids=False, 

1251 ) 

1252 importButler = Butler(importDir, run=self.default_run) 

1253 for ref in datasets: 

1254 with self.subTest(ref=ref): 

1255 # Test for existence by passing in the DatasetType and 

1256 # data ID separately, to avoid lookup by dataset_id. 

1257 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1258 self.assertEqual( 

1259 list(importButler.registry.queryDimensionRecords("skymap")), 

1260 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1261 ) 

1262 

1263 def testRemoveRuns(self): 

1264 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1265 butler = Butler(self.tmpConfigFile, writeable=True) 

1266 # Load registry data with dimensions to hang datasets off of. 

1267 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1268 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1269 # Add some RUN-type collection. 

1270 run1 = "run1" 

1271 butler.registry.registerRun(run1) 

1272 run2 = "run2" 

1273 butler.registry.registerRun(run2) 

1274 # put a dataset in each 

1275 metric = makeExampleMetrics() 

1276 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1277 datasetType = self.addDatasetType( 

1278 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1279 ) 

1280 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1281 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1282 uri1 = butler.getURI(ref1, collections=[run1]) 

1283 uri2 = butler.getURI(ref2, collections=[run2]) 

1284 # Remove from both runs with different values for unstore. 

1285 butler.removeRuns([run1], unstore=True) 

1286 butler.removeRuns([run2], unstore=False) 

1287 # Should be nothing in registry for either one, and datastore should 

1288 # not think either exists. 

1289 with self.assertRaises(MissingCollectionError): 

1290 butler.registry.getCollectionType(run1) 

1291 with self.assertRaises(MissingCollectionError): 

1292 butler.registry.getCollectionType(run2) 

1293 self.assertFalse(butler.datastore.exists(ref1)) 

1294 self.assertFalse(butler.datastore.exists(ref2)) 

1295 # The ref we unstored should be gone according to the URI, but the 

1296 # one we forgot should still be around. 

1297 self.assertFalse(uri1.exists()) 

1298 self.assertTrue(uri2.exists()) 

1299 

1300 

1301class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1302 """PosixDatastore specialization of a butler""" 

1303 

1304 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1305 fullConfigKey = ".datastore.formatters" 

1306 validationCanFail = True 

1307 datastoreStr = ["/tmp"] 

1308 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1309 registryStr = "/gen3.sqlite3" 

1310 

1311 def testPathConstructor(self): 

1312 """Independent test of constructor using PathLike.""" 

1313 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1314 self.assertIsInstance(butler, Butler) 

1315 

1316 # And again with a Path object with the butler yaml 

1317 path = pathlib.Path(self.tmpConfigFile) 

1318 butler = Butler(path, writeable=False) 

1319 self.assertIsInstance(butler, Butler) 

1320 

1321 # And again with a Path object without the butler yaml 

1322 # (making sure we skip it if the tmp config doesn't end 

1323 # in butler.yaml -- which is the case for a subclass) 

1324 if self.tmpConfigFile.endswith("butler.yaml"): 

1325 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1326 butler = Butler(path, writeable=False) 

1327 self.assertIsInstance(butler, Butler) 

1328 

1329 def testExportTransferCopy(self): 

1330 """Test local export using all transfer modes""" 

1331 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1332 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1333 # Test that the repo actually has at least one dataset. 

1334 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1335 self.assertGreater(len(datasets), 0) 

1336 uris = [exportButler.getURI(d) for d in datasets] 

1337 datastoreRoot = exportButler.datastore.root 

1338 

1339 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1340 

1341 for path in pathsInStore: 

1342 # Assume local file system 

1343 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1344 

1345 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1346 with safeTestTempDir(TESTDIR) as exportDir: 

1347 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1348 export.saveDatasets(datasets) 

1349 for path in pathsInStore: 

1350 self.assertTrue( 

1351 self.checkFileExists(exportDir, path), 

1352 f"Check that mode {transfer} exported files", 

1353 ) 

1354 

1355 def testPruneDatasets(self): 

1356 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1357 butler = Butler(self.tmpConfigFile, writeable=True) 

1358 # Load registry data with dimensions to hang datasets off of. 

1359 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1360 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1361 # Add some RUN-type collections. 

1362 run1 = "run1" 

1363 butler.registry.registerRun(run1) 

1364 run2 = "run2" 

1365 butler.registry.registerRun(run2) 

1366 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1367 # different runs. ref3 has a different data ID. 

1368 metric = makeExampleMetrics() 

1369 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1370 datasetType = self.addDatasetType( 

1371 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1372 ) 

1373 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1374 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1375 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1376 

1377 # Simple prune. 

1378 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1379 with self.assertRaises(LookupError): 

1380 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1381 

1382 # Put data back. 

1383 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1384 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1385 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1386 

1387 # Check that in normal mode, deleting the record will lead to 

1388 # trash not touching the file. 

1389 uri1 = butler.datastore.getURI(ref1) 

1390 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1391 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1392 butler.datastore.trash(ref1) 

1393 butler.datastore.emptyTrash() 

1394 self.assertTrue(uri1.exists()) 

1395 uri1.remove() # Clean it up. 

1396 

1397 # Simulate execution butler setup by deleting the datastore 

1398 # record but keeping the file around and trusting. 

1399 butler.datastore.trustGetRequest = True 

1400 uri2 = butler.datastore.getURI(ref2) 

1401 uri3 = butler.datastore.getURI(ref3) 

1402 self.assertTrue(uri2.exists()) 

1403 self.assertTrue(uri3.exists()) 

1404 

1405 # Remove the datastore record. 

1406 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1407 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1408 self.assertTrue(uri2.exists()) 

1409 butler.datastore.trash([ref2, ref3]) 

1410 # Immediate removal for ref2 file 

1411 self.assertFalse(uri2.exists()) 

1412 # But ref3 has to wait for the empty. 

1413 self.assertTrue(uri3.exists()) 

1414 butler.datastore.emptyTrash() 

1415 self.assertFalse(uri3.exists()) 

1416 

1417 # Clear out the datasets from registry. 

1418 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1419 

1420 def testPytypePutCoercion(self): 

1421 """Test python type coercion on Butler.get and put.""" 

1422 

1423 # Store some data with the normal example storage class. 

1424 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1425 datasetTypeName = "test_metric" 

1426 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

1427 

1428 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1429 

1430 # Put a dict and this should coerce to a MetricsExample 

1431 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

1432 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

1433 test_metric = butler.getDirect(metric_ref) 

1434 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

1435 self.assertEqual(test_metric.summary, test_dict["summary"]) 

1436 self.assertEqual(test_metric.output, test_dict["output"]) 

1437 

1438 # Check that the put still works if a DatasetType is given with 

1439 # a definition matching this python type. 

1440 registry_type = butler.registry.getDatasetType(datasetTypeName) 

1441 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

1442 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

1443 self.assertEqual(metric2_ref.datasetType, registry_type) 

1444 

1445 # The get will return the type expected by registry. 

1446 test_metric2 = butler.getDirect(metric2_ref) 

1447 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

1448 

1449 # Make a new DatasetRef with the compatible but different DatasetType. 

1450 # This should now return a dict. 

1451 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

1452 test_dict2 = butler.getDirect(new_ref) 

1453 self.assertEqual(get_full_type_name(test_dict2), "dict") 

1454 

1455 # Get it again with the wrong dataset type definition using get() 

1456 # rather than getDirect(). This should be consistent with getDirect() 

1457 # behavior and return the type of the DatasetType. 

1458 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

1459 self.assertEqual(get_full_type_name(test_dict3), "dict") 

1460 

1461 def testPytypeCoercion(self): 

1462 """Test python type coercion on Butler.get and put.""" 

1463 

1464 # Store some data with the normal example storage class. 

1465 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1466 datasetTypeName = "test_metric" 

1467 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1468 

1469 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1470 metric = butler.get(datasetTypeName, dataId=dataId) 

1471 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1472 

1473 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1474 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1475 

1476 # Now need to hack the registry dataset type definition. 

1477 # There is no API for this. 

1478 manager = butler.registry._managers.datasets 

1479 manager._db.update( 

1480 manager._static.dataset_type, 

1481 {"name": datasetTypeName}, 

1482 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1483 ) 

1484 

1485 # Force reset of dataset type cache 

1486 butler.registry.refresh() 

1487 

1488 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1489 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1490 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1491 

1492 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1493 self.assertNotEqual(type(metric_model), type(metric)) 

1494 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1495 

1496 # Put the model and read it back to show that everything now 

1497 # works as normal. 

1498 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1499 metric_model_new = butler.get(metric_ref) 

1500 self.assertEqual(metric_model_new, metric_model) 

1501 

1502 # Hack the storage class again to something that will fail on the 

1503 # get with no conversion class. 

1504 manager._db.update( 

1505 manager._static.dataset_type, 

1506 {"name": datasetTypeName}, 

1507 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1508 ) 

1509 butler.registry.refresh() 

1510 

1511 with self.assertRaises(ValueError): 

1512 butler.get(datasetTypeName, dataId=dataId) 

1513 

1514 

1515@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1516class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1517 """PosixDatastore specialization of a butler using Postgres""" 

1518 

1519 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1520 fullConfigKey = ".datastore.formatters" 

1521 validationCanFail = True 

1522 datastoreStr = ["/tmp"] 

1523 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1524 registryStr = "PostgreSQL@test" 

1525 

1526 @staticmethod 

1527 def _handler(postgresql): 

1528 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1529 with engine.begin() as connection: 

1530 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1531 

1532 @classmethod 

1533 def setUpClass(cls): 

1534 # Create the postgres test server. 

1535 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1536 cache_initialized_db=True, on_initialized=cls._handler 

1537 ) 

1538 super().setUpClass() 

1539 

1540 @classmethod 

1541 def tearDownClass(cls): 

1542 # Clean up any lingering SQLAlchemy engines/connections 

1543 # so they're closed before we shut down the server. 

1544 gc.collect() 

1545 cls.postgresql.clear_cache() 

1546 super().tearDownClass() 

1547 

1548 def setUp(self): 

1549 self.server = self.postgresql() 

1550 

1551 # Need to add a registry section to the config. 

1552 self._temp_config = False 

1553 config = Config(self.configFile) 

1554 config["registry", "db"] = self.server.url() 

1555 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1556 config.dump(fh) 

1557 self.configFile = fh.name 

1558 self._temp_config = True 

1559 super().setUp() 

1560 

1561 def tearDown(self): 

1562 self.server.stop() 

1563 if self._temp_config and os.path.exists(self.configFile): 

1564 os.remove(self.configFile) 

1565 super().tearDown() 

1566 

1567 def testMakeRepo(self): 

1568 # The base class test assumes that it's using sqlite and assumes 

1569 # the config file is acceptable to sqlite. 

1570 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1571 

1572 

1573class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1574 """InMemoryDatastore specialization of a butler""" 

1575 

1576 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1577 fullConfigKey = None 

1578 useTempRoot = False 

1579 validationCanFail = False 

1580 datastoreStr = ["datastore='InMemory"] 

1581 datastoreName = ["InMemoryDatastore@"] 

1582 registryStr = "/gen3.sqlite3" 

1583 

1584 def testIngest(self): 

1585 pass 

1586 

1587 

1588class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1589 """PosixDatastore specialization""" 

1590 

1591 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1592 fullConfigKey = ".datastore.datastores.1.formatters" 

1593 validationCanFail = True 

1594 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1595 datastoreName = [ 

1596 "InMemoryDatastore@", 

1597 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1598 "SecondDatastore", 

1599 ] 

1600 registryStr = "/gen3.sqlite3" 

1601 

1602 

1603class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1604 """Test that a yaml file in one location can refer to a root in another.""" 

1605 

1606 datastoreStr = ["dir1"] 

1607 # Disable the makeRepo test since we are deliberately not using 

1608 # butler.yaml as the config name. 

1609 fullConfigKey = None 

1610 

1611 def setUp(self): 

1612 self.root = makeTestTempDir(TESTDIR) 

1613 

1614 # Make a new repository in one place 

1615 self.dir1 = os.path.join(self.root, "dir1") 

1616 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1617 

1618 # Move the yaml file to a different place and add a "root" 

1619 self.dir2 = os.path.join(self.root, "dir2") 

1620 os.makedirs(self.dir2, exist_ok=True) 

1621 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1622 config = Config(configFile1) 

1623 config["root"] = self.dir1 

1624 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1625 config.dumpToUri(configFile2) 

1626 os.remove(configFile1) 

1627 self.tmpConfigFile = configFile2 

1628 

1629 def testFileLocations(self): 

1630 self.assertNotEqual(self.dir1, self.dir2) 

1631 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1632 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1633 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1634 

1635 

1636class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1637 """Test that a config file created by makeRepo outside of repo works.""" 

1638 

1639 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1640 

1641 def setUp(self): 

1642 self.root = makeTestTempDir(TESTDIR) 

1643 self.root2 = makeTestTempDir(TESTDIR) 

1644 

1645 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1646 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1647 

1648 def tearDown(self): 

1649 if os.path.exists(self.root2): 

1650 shutil.rmtree(self.root2, ignore_errors=True) 

1651 super().tearDown() 

1652 

1653 def testConfigExistence(self): 

1654 c = Config(self.tmpConfigFile) 

1655 uri_config = ResourcePath(c["root"]) 

1656 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1657 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1658 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1659 

1660 def testPutGet(self): 

1661 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1662 self.runPutGetTest(storageClass, "test_metric") 

1663 

1664 

1665class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1666 """Test that a config file created by makeRepo outside of repo works.""" 

1667 

1668 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1669 

1670 def setUp(self): 

1671 self.root = makeTestTempDir(TESTDIR) 

1672 self.root2 = makeTestTempDir(TESTDIR) 

1673 

1674 self.tmpConfigFile = self.root2 

1675 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1676 

1677 def testConfigExistence(self): 

1678 # Append the yaml file else Config constructor does not know the file 

1679 # type. 

1680 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1681 super().testConfigExistence() 

1682 

1683 

1684class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1685 """Test that a config file created by makeRepo outside of repo works.""" 

1686 

1687 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1688 

1689 def setUp(self): 

1690 self.root = makeTestTempDir(TESTDIR) 

1691 self.root2 = makeTestTempDir(TESTDIR) 

1692 

1693 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1694 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1695 

1696 

1697@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1698class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1699 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1700 a local in-memory SqlRegistry. 

1701 """ 

1702 

1703 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1704 fullConfigKey = None 

1705 validationCanFail = True 

1706 

1707 bucketName = "anybucketname" 

1708 """Name of the Bucket that will be used in the tests. The name is read from 

1709 the config file used with the tests during set-up. 

1710 """ 

1711 

1712 root = "butlerRoot/" 

1713 """Root repository directory expected to be used in case useTempRoot=False. 

1714 Otherwise the root is set to a 20 characters long randomly generated string 

1715 during set-up. 

1716 """ 

1717 

1718 datastoreStr = [f"datastore={root}"] 

1719 """Contains all expected root locations in a format expected to be 

1720 returned by Butler stringification. 

1721 """ 

1722 

1723 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1724 """The expected format of the S3 Datastore string.""" 

1725 

1726 registryStr = "/gen3.sqlite3" 

1727 """Expected format of the Registry string.""" 

1728 

1729 mock_s3 = mock_s3() 

1730 """The mocked s3 interface from moto.""" 

1731 

1732 def genRoot(self): 

1733 """Returns a random string of len 20 to serve as a root 

1734 name for the temporary bucket repo. 

1735 

1736 This is equivalent to tempfile.mkdtemp as this is what self.root 

1737 becomes when useTempRoot is True. 

1738 """ 

1739 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1740 return rndstr + "/" 

1741 

1742 def setUp(self): 

1743 config = Config(self.configFile) 

1744 uri = ResourcePath(config[".datastore.datastore.root"]) 

1745 self.bucketName = uri.netloc 

1746 

1747 # Enable S3 mocking of tests. 

1748 self.mock_s3.start() 

1749 

1750 # set up some fake credentials if they do not exist 

1751 self.usingDummyCredentials = setAwsEnvCredentials() 

1752 

1753 if self.useTempRoot: 

1754 self.root = self.genRoot() 

1755 rooturi = f"s3://{self.bucketName}/{self.root}" 

1756 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1757 

1758 # need local folder to store registry database 

1759 self.reg_dir = makeTestTempDir(TESTDIR) 

1760 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1761 

1762 # MOTO needs to know that we expect Bucket bucketname to exist 

1763 # (this used to be the class attribute bucketName) 

1764 s3 = boto3.resource("s3") 

1765 s3.create_bucket(Bucket=self.bucketName) 

1766 

1767 self.datastoreStr = f"datastore={self.root}" 

1768 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1769 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1770 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1771 

1772 def tearDown(self): 

1773 s3 = boto3.resource("s3") 

1774 bucket = s3.Bucket(self.bucketName) 

1775 try: 

1776 bucket.objects.all().delete() 

1777 except botocore.exceptions.ClientError as e: 

1778 if e.response["Error"]["Code"] == "404": 

1779 # the key was not reachable - pass 

1780 pass 

1781 else: 

1782 raise 

1783 

1784 bucket = s3.Bucket(self.bucketName) 

1785 bucket.delete() 

1786 

1787 # Stop the S3 mock. 

1788 self.mock_s3.stop() 

1789 

1790 # unset any potentially set dummy credentials 

1791 if self.usingDummyCredentials: 

1792 unsetAwsEnvCredentials() 

1793 

1794 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1795 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1796 

1797 if self.useTempRoot and os.path.exists(self.root): 

1798 shutil.rmtree(self.root, ignore_errors=True) 

1799 

1800 super().tearDown() 

1801 

1802 

1803@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1804class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1805 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1806 a local in-memory SqlRegistry. 

1807 """ 

1808 

1809 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1810 fullConfigKey = None 

1811 validationCanFail = True 

1812 

1813 serverName = "localhost" 

1814 """Name of the server that will be used in the tests. 

1815 """ 

1816 

1817 portNumber = 8080 

1818 """Port on which the webdav server listens. Automatically chosen 

1819 at setUpClass via the _getfreeport() method 

1820 """ 

1821 

1822 root = "butlerRoot/" 

1823 """Root repository directory expected to be used in case useTempRoot=False. 

1824 Otherwise the root is set to a 20 characters long randomly generated string 

1825 during set-up. 

1826 """ 

1827 

1828 datastoreStr = [f"datastore={root}"] 

1829 """Contains all expected root locations in a format expected to be 

1830 returned by Butler stringification. 

1831 """ 

1832 

1833 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1834 """The expected format of the WebdavDatastore string.""" 

1835 

1836 registryStr = "/gen3.sqlite3" 

1837 """Expected format of the Registry string.""" 

1838 

1839 serverThread = None 

1840 """Thread in which the local webdav server will run""" 

1841 

1842 stopWebdavServer = False 

1843 """This flag will cause the webdav server to 

1844 gracefully shut down when True 

1845 """ 

1846 

1847 def genRoot(self): 

1848 """Returns a random string of len 20 to serve as a root 

1849 name for the temporary bucket repo. 

1850 

1851 This is equivalent to tempfile.mkdtemp as this is what self.root 

1852 becomes when useTempRoot is True. 

1853 """ 

1854 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1855 return rndstr + "/" 

1856 

1857 @classmethod 

1858 def setUpClass(cls): 

1859 # Do the same as inherited class 

1860 cls.storageClassFactory = StorageClassFactory() 

1861 cls.storageClassFactory.addFromConfig(cls.configFile) 

1862 

1863 cls.portNumber = cls._getfreeport() 

1864 # Run a local webdav server on which tests will be run 

1865 cls.serverThread = Thread( 

1866 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1867 ) 

1868 cls.serverThread.start() 

1869 # Wait for it to start 

1870 time.sleep(3) 

1871 

1872 @classmethod 

1873 def tearDownClass(cls): 

1874 # Ask for graceful shut down of the webdav server 

1875 cls.stopWebdavServer = True 

1876 # Wait for the thread to exit 

1877 cls.serverThread.join() 

1878 super().tearDownClass() 

1879 

1880 def setUp(self): 

1881 config = Config(self.configFile) 

1882 

1883 if self.useTempRoot: 

1884 self.root = self.genRoot() 

1885 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1886 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1887 

1888 # need local folder to store registry database 

1889 self.reg_dir = makeTestTempDir(TESTDIR) 

1890 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1891 

1892 self.datastoreStr = f"datastore={self.root}" 

1893 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1894 

1895 if not _is_webdav_endpoint(self.rooturi): 

1896 raise OSError("Webdav server not running properly: cannot run tests.") 

1897 

1898 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1899 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1900 

1901 def tearDown(self): 

1902 # Clear temporary directory 

1903 ResourcePath(self.rooturi).remove() 

1904 ResourcePath(self.rooturi).session.close() 

1905 

1906 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1907 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1908 

1909 if self.useTempRoot and os.path.exists(self.root): 

1910 shutil.rmtree(self.root, ignore_errors=True) 

1911 

1912 super().tearDown() 

1913 

1914 def _serveWebdav(self, port: int, stopWebdavServer): 

1915 """Starts a local webdav-compatible HTTP server, 

1916 Listening on http://localhost:port 

1917 This server only runs when this test class is instantiated, 

1918 and then shuts down. Must be started is a separate thread. 

1919 

1920 Parameters 

1921 ---------- 

1922 port : `int` 

1923 The port number on which the server should listen 

1924 """ 

1925 root_path = gettempdir() 

1926 

1927 config = { 

1928 "host": "0.0.0.0", 

1929 "port": port, 

1930 "provider_mapping": {"/": root_path}, 

1931 "http_authenticator": {"domain_controller": None}, 

1932 "simple_dc": {"user_mapping": {"*": True}}, 

1933 "verbose": 0, 

1934 } 

1935 app = WsgiDAVApp(config) 

1936 

1937 server_args = { 

1938 "bind_addr": (config["host"], config["port"]), 

1939 "wsgi_app": app, 

1940 } 

1941 server = wsgi.Server(**server_args) 

1942 server.prepare() 

1943 

1944 try: 

1945 # Start the actual server in a separate thread 

1946 t = Thread(target=server.serve, daemon=True) 

1947 t.start() 

1948 # watch stopWebdavServer, and gracefully 

1949 # shut down the server when True 

1950 while True: 

1951 if stopWebdavServer(): 

1952 break 

1953 time.sleep(1) 

1954 except KeyboardInterrupt: 

1955 print("Caught Ctrl-C, shutting down...") 

1956 finally: 

1957 server.stop() 

1958 t.join() 

1959 

1960 def _getfreeport(): 

1961 """ 

1962 Determines a free port using sockets. 

1963 """ 

1964 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

1965 free_socket.bind(("127.0.0.1", 0)) 

1966 free_socket.listen() 

1967 port = free_socket.getsockname()[1] 

1968 free_socket.close() 

1969 return port 

1970 

1971 

1972class PosixDatastoreTransfers(unittest.TestCase): 

1973 """Test data transfers between butlers. 

1974 

1975 Test for different managers. UUID to UUID and integer to integer are 

1976 tested. UUID to integer is not supported since we do not currently 

1977 want to allow that. Integer to UUID is supported with the caveat 

1978 that UUID4 will be generated and this will be incorrect for raw 

1979 dataset types. The test ignores that. 

1980 """ 

1981 

1982 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1983 

1984 @classmethod 

1985 def setUpClass(cls): 

1986 cls.storageClassFactory = StorageClassFactory() 

1987 cls.storageClassFactory.addFromConfig(cls.configFile) 

1988 

1989 def setUp(self): 

1990 self.root = makeTestTempDir(TESTDIR) 

1991 self.config = Config(self.configFile) 

1992 

1993 def tearDown(self): 

1994 removeTestTempDir(self.root) 

1995 

1996 def create_butler(self, manager, label): 

1997 config = Config(self.configFile) 

1998 config["registry", "managers", "datasets"] = manager 

1999 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2000 

2001 def create_butlers(self, manager1, manager2): 

2002 self.source_butler = self.create_butler(manager1, "1") 

2003 self.target_butler = self.create_butler(manager2, "2") 

2004 

2005 def testTransferUuidToUuid(self): 

2006 self.create_butlers( 

2007 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2008 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2009 ) 

2010 # Setting id_gen_map should have no effect here 

2011 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2012 

2013 def testTransferIntToInt(self): 

2014 with self.assertWarns(FutureWarning): 

2015 self.create_butlers( 

2016 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2017 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2018 ) 

2019 # int dataset ID only allows UNIQUE 

2020 self.assertButlerTransfers() 

2021 

2022 def testTransferIntToUuid(self): 

2023 with self.assertWarns(FutureWarning): 

2024 self.create_butlers( 

2025 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2026 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2027 ) 

2028 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2029 

2030 def testTransferMissing(self): 

2031 """Test transfers where datastore records are missing. 

2032 

2033 This is how execution butler works. 

2034 """ 

2035 self.create_butlers( 

2036 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2037 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2038 ) 

2039 

2040 # Configure the source butler to allow trust. 

2041 self.source_butler.datastore.trustGetRequest = True 

2042 

2043 self.assertButlerTransfers(purge=True) 

2044 

2045 def testTransferMissingDisassembly(self): 

2046 """Test transfers where datastore records are missing. 

2047 

2048 This is how execution butler works. 

2049 """ 

2050 self.create_butlers( 

2051 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2052 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2053 ) 

2054 

2055 # Configure the source butler to allow trust. 

2056 self.source_butler.datastore.trustGetRequest = True 

2057 

2058 # Test disassembly. 

2059 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2060 

2061 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2062 """Test that a run can be transferred to another butler.""" 

2063 

2064 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2065 datasetTypeName = "random_data" 

2066 

2067 # Test will create 3 collections and we will want to transfer 

2068 # two of those three. 

2069 runs = ["run1", "run2", "other"] 

2070 

2071 # Also want to use two different dataset types to ensure that 

2072 # grouping works. 

2073 datasetTypeNames = ["random_data", "random_data_2"] 

2074 

2075 # Create the run collections in the source butler. 

2076 for run in runs: 

2077 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2078 

2079 # Create dimensions in source butler. 

2080 n_exposures = 30 

2081 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2082 self.source_butler.registry.insertDimensionData( 

2083 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2084 ) 

2085 self.source_butler.registry.insertDimensionData( 

2086 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2087 ) 

2088 

2089 for i in range(n_exposures): 

2090 self.source_butler.registry.insertDimensionData( 

2091 "exposure", 

2092 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2093 ) 

2094 

2095 # Create dataset types in the source butler. 

2096 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2097 for datasetTypeName in datasetTypeNames: 

2098 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2099 self.source_butler.registry.registerDatasetType(datasetType) 

2100 

2101 # Write a dataset to an unrelated run -- this will ensure that 

2102 # we are rewriting integer dataset ids in the target if necessary. 

2103 # Will not be relevant for UUID. 

2104 run = "distraction" 

2105 butler = Butler(butler=self.source_butler, run=run) 

2106 butler.put( 

2107 makeExampleMetrics(), 

2108 datasetTypeName, 

2109 exposure=1, 

2110 instrument="DummyCamComp", 

2111 physical_filter="d-r", 

2112 ) 

2113 

2114 # Write some example metrics to the source 

2115 butler = Butler(butler=self.source_butler) 

2116 

2117 # Set of DatasetRefs that should be in the list of refs to transfer 

2118 # but which will not be transferred. 

2119 deleted = set() 

2120 

2121 n_expected = 20 # Number of datasets expected to be transferred 

2122 source_refs = [] 

2123 for i in range(n_exposures): 

2124 # Put a third of datasets into each collection, only retain 

2125 # two thirds. 

2126 index = i % 3 

2127 run = runs[index] 

2128 datasetTypeName = datasetTypeNames[i % 2] 

2129 

2130 metric_data = { 

2131 "summary": {"counter": i}, 

2132 "output": {"text": "metric"}, 

2133 "data": [2 * x for x in range(i)], 

2134 } 

2135 metric = MetricsExample(**metric_data) 

2136 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2137 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2138 

2139 # Remove the datastore record using low-level API 

2140 if purge: 

2141 # Remove records for a fraction. 

2142 if index == 1: 

2143 

2144 # For one of these delete the file as well. 

2145 # This allows the "missing" code to filter the 

2146 # file out. 

2147 if not deleted: 

2148 primary, uris = butler.datastore.getURIs(ref) 

2149 if primary: 

2150 primary.remove() 

2151 for uri in uris.values(): 

2152 uri.remove() 

2153 n_expected -= 1 

2154 deleted.add(ref) 

2155 

2156 # Remove the datastore record. 

2157 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2158 

2159 if index < 2: 

2160 source_refs.append(ref) 

2161 if ref not in deleted: 

2162 new_metric = butler.get(ref.unresolved(), collections=run) 

2163 self.assertEqual(new_metric, metric) 

2164 

2165 # Create some bad dataset types to ensure we check for inconsistent 

2166 # definitions. 

2167 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2168 for datasetTypeName in datasetTypeNames: 

2169 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2170 self.target_butler.registry.registerDatasetType(datasetType) 

2171 with self.assertRaises(ConflictingDefinitionError) as cm: 

2172 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2173 self.assertIn("dataset type differs", str(cm.exception)) 

2174 

2175 # And remove the bad definitions. 

2176 for datasetTypeName in datasetTypeNames: 

2177 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2178 

2179 # Transfer without creating dataset types should fail. 

2180 with self.assertRaises(KeyError): 

2181 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2182 

2183 # Transfer without creating dimensions should fail. 

2184 with self.assertRaises(ConflictingDefinitionError) as cm: 

2185 self.target_butler.transfer_from( 

2186 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2187 ) 

2188 self.assertIn("dimension", str(cm.exception)) 

2189 

2190 # The failed transfer above leaves registry in an inconsistent 

2191 # state because the run is created but then rolled back without 

2192 # the collection cache being cleared. For now force a refresh. 

2193 # Can remove with DM-35498. 

2194 self.target_butler.registry.refresh() 

2195 

2196 # Now transfer them to the second butler, including dimensions. 

2197 with self.assertLogs(level=logging.DEBUG) as cm: 

2198 transferred = self.target_butler.transfer_from( 

2199 self.source_butler, 

2200 source_refs, 

2201 id_gen_map=id_gen_map, 

2202 register_dataset_types=True, 

2203 transfer_dimensions=True, 

2204 ) 

2205 self.assertEqual(len(transferred), n_expected) 

2206 log_output = ";".join(cm.output) 

2207 self.assertIn("found in datastore for chunk", log_output) 

2208 self.assertIn("Creating output run", log_output) 

2209 

2210 # Do the transfer twice to ensure that it will do nothing extra. 

2211 # Only do this if purge=True because it does not work for int 

2212 # dataset_id. 

2213 if purge: 

2214 # This should not need to register dataset types. 

2215 transferred = self.target_butler.transfer_from( 

2216 self.source_butler, source_refs, id_gen_map=id_gen_map 

2217 ) 

2218 self.assertEqual(len(transferred), n_expected) 

2219 

2220 # Also do an explicit low-level transfer to trigger some 

2221 # edge cases. 

2222 with self.assertLogs(level=logging.DEBUG) as cm: 

2223 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2224 log_output = ";".join(cm.output) 

2225 self.assertIn("no file artifacts exist", log_output) 

2226 

2227 with self.assertRaises(TypeError): 

2228 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2229 

2230 with self.assertRaises(ValueError): 

2231 self.target_butler.datastore.transfer_from( 

2232 self.source_butler.datastore, source_refs, transfer="split" 

2233 ) 

2234 

2235 # Now try to get the same refs from the new butler. 

2236 for ref in source_refs: 

2237 if ref not in deleted: 

2238 unresolved_ref = ref.unresolved() 

2239 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2240 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2241 self.assertEqual(new_metric, old_metric) 

2242 

2243 # Now prune run2 collection and create instead a CHAINED collection. 

2244 # This should block the transfer. 

2245 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2246 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2247 with self.assertRaises(CollectionTypeError): 

2248 # Re-importing the run1 datasets can be problematic if they 

2249 # use integer IDs so filter those out. 

2250 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2251 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2252 

2253 

2254if __name__ == "__main__": 2254 ↛ 2255line 2254 didn't jump to line 2255, because the condition on line 2254 was never true

2255 unittest.main()