Coverage for tests/test_butler.py: 14%

1251 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-26 02:02 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionType, 

75 Config, 

76 DatasetIdGenEnum, 

77 DatasetRef, 

78 DatasetType, 

79 FileDataset, 

80 FileTemplate, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, ("other",)) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testStorageClassOverrideGet(self): 

658 """Test storage class conversion on get with override.""" 

659 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

660 datasetTypeName = "anything" 

661 run = self.default_run 

662 

663 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

664 

665 # Create and store a dataset. 

666 metric = makeExampleMetrics() 

667 dataId = {"instrument": "DummyCamComp", "visit": 423} 

668 

669 ref = butler.put(metric, datasetType, dataId) 

670 

671 # Return native type. 

672 retrieved = butler.get(ref) 

673 self.assertEqual(retrieved, metric) 

674 

675 # Specify an override. 

676 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

677 model = butler.getDirect(ref, storageClass=new_sc) 

678 self.assertNotEqual(type(model), type(retrieved)) 

679 self.assertIs(type(model), new_sc.pytype) 

680 self.assertEqual(retrieved, model) 

681 

682 # Defer but override later. 

683 deferred = butler.getDirectDeferred(ref) 

684 model = deferred.get(storageClass=new_sc) 

685 self.assertIs(type(model), new_sc.pytype) 

686 self.assertEqual(retrieved, model) 

687 

688 # Defer but override up front. 

689 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

690 model = deferred.get() 

691 self.assertIs(type(model), new_sc.pytype) 

692 self.assertEqual(retrieved, model) 

693 

694 # Retrieve a component. Should be a tuple. 

695 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

696 self.assertIs(type(data), tuple) 

697 self.assertEqual(data, tuple(retrieved.data)) 

698 

699 # Parameter on the write storage class should work regardless 

700 # of read storage class. 

701 data = butler.get( 

702 "anything.data", 

703 dataId, 

704 storageClass="StructuredDataDataTestTuple", 

705 parameters={"slice": slice(2, 4)}, 

706 ) 

707 self.assertEqual(len(data), 2) 

708 

709 # Try a parameter that is known to the read storage class but not 

710 # the write storage class. 

711 with self.assertRaises(KeyError): 

712 butler.get( 

713 "anything.data", 

714 dataId, 

715 storageClass="StructuredDataDataTestTuple", 

716 parameters={"xslice": slice(2, 4)}, 

717 ) 

718 

719 def testPytypePutCoercion(self): 

720 """Test python type coercion on Butler.get and put.""" 

721 

722 # Store some data with the normal example storage class. 

723 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

724 datasetTypeName = "test_metric" 

725 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

726 

727 dataId = {"instrument": "DummyCamComp", "visit": 423} 

728 

729 # Put a dict and this should coerce to a MetricsExample 

730 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

731 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

732 test_metric = butler.getDirect(metric_ref) 

733 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

734 self.assertEqual(test_metric.summary, test_dict["summary"]) 

735 self.assertEqual(test_metric.output, test_dict["output"]) 

736 

737 # Check that the put still works if a DatasetType is given with 

738 # a definition matching this python type. 

739 registry_type = butler.registry.getDatasetType(datasetTypeName) 

740 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

741 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

742 self.assertEqual(metric2_ref.datasetType, registry_type) 

743 

744 # The get will return the type expected by registry. 

745 test_metric2 = butler.getDirect(metric2_ref) 

746 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

747 

748 # Make a new DatasetRef with the compatible but different DatasetType. 

749 # This should now return a dict. 

750 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

751 test_dict2 = butler.getDirect(new_ref) 

752 self.assertEqual(get_full_type_name(test_dict2), "dict") 

753 

754 # Get it again with the wrong dataset type definition using get() 

755 # rather than getDirect(). This should be consistent with getDirect() 

756 # behavior and return the type of the DatasetType. 

757 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

758 self.assertEqual(get_full_type_name(test_dict3), "dict") 

759 

760 def testIngest(self): 

761 butler = Butler(self.tmpConfigFile, run=self.default_run) 

762 

763 # Create and register a DatasetType 

764 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

765 

766 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

767 datasetTypeName = "metric" 

768 

769 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

770 

771 # Add needed Dimensions 

772 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

773 butler.registry.insertDimensionData( 

774 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

775 ) 

776 for detector in (1, 2): 

777 butler.registry.insertDimensionData( 

778 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

779 ) 

780 

781 butler.registry.insertDimensionData( 

782 "visit", 

783 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

784 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

785 ) 

786 

787 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

788 dataRoot = os.path.join(TESTDIR, "data", "basic") 

789 datasets = [] 

790 for detector in (1, 2): 

791 detector_name = f"detector_{detector}" 

792 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

793 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

794 # Create a DatasetRef for ingest 

795 refIn = DatasetRef(datasetType, dataId, id=None) 

796 

797 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

798 

799 butler.ingest(*datasets, transfer="copy") 

800 

801 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

802 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

803 

804 metrics1 = butler.get(datasetTypeName, dataId1) 

805 metrics2 = butler.get(datasetTypeName, dataId2) 

806 self.assertNotEqual(metrics1, metrics2) 

807 

808 # Compare URIs 

809 uri1 = butler.getURI(datasetTypeName, dataId1) 

810 uri2 = butler.getURI(datasetTypeName, dataId2) 

811 self.assertNotEqual(uri1, uri2) 

812 

813 # Now do a multi-dataset but single file ingest 

814 metricFile = os.path.join(dataRoot, "detectors.yaml") 

815 refs = [] 

816 for detector in (1, 2): 

817 detector_name = f"detector_{detector}" 

818 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

819 # Create a DatasetRef for ingest 

820 refs.append(DatasetRef(datasetType, dataId, id=None)) 

821 

822 # Test "move" transfer to ensure that the files themselves 

823 # have disappeared following ingest. 

824 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

825 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

826 

827 datasets = [] 

828 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

829 

830 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

831 self.assertFalse(tempFile.exists()) 

832 

833 # Check that the datastore recorded no file size. 

834 # Not all datastores can support this. 

835 try: 

836 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

837 self.assertEqual(infos[0].file_size, -1) 

838 except AttributeError: 

839 pass 

840 

841 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

842 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

843 

844 multi1 = butler.get(datasetTypeName, dataId1) 

845 multi2 = butler.get(datasetTypeName, dataId2) 

846 

847 self.assertEqual(multi1, metrics1) 

848 self.assertEqual(multi2, metrics2) 

849 

850 # Compare URIs 

851 uri1 = butler.getURI(datasetTypeName, dataId1) 

852 uri2 = butler.getURI(datasetTypeName, dataId2) 

853 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

854 

855 # Test that removing one does not break the second 

856 # This line will issue a warning log message for a ChainedDatastore 

857 # that uses an InMemoryDatastore since in-memory can not ingest 

858 # files. 

859 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

860 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

861 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

862 multi2b = butler.get(datasetTypeName, dataId2) 

863 self.assertEqual(multi2, multi2b) 

864 

865 def testPruneCollections(self): 

866 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

867 butler = Butler(self.tmpConfigFile, writeable=True) 

868 # Load registry data with dimensions to hang datasets off of. 

869 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

870 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

871 # Add some RUN-type collections. 

872 run1 = "run1" 

873 butler.registry.registerRun(run1) 

874 run2 = "run2" 

875 butler.registry.registerRun(run2) 

876 # put some datasets. ref1 and ref2 have the same data ID, and are in 

877 # different runs. ref3 has a different data ID. 

878 metric = makeExampleMetrics() 

879 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

880 datasetType = self.addDatasetType( 

881 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

882 ) 

883 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

884 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

885 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

886 

887 # Try to delete a RUN collection without purge, or with purge and not 

888 # unstore. 

889 with self.assertRaises(TypeError): 

890 butler.pruneCollection(run1) 

891 with self.assertRaises(TypeError): 

892 butler.pruneCollection(run2, purge=True) 

893 # Add a TAGGED collection and associate ref3 only into it. 

894 tag1 = "tag1" 

895 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

896 self.assertTrue(registered) 

897 # Registering a second time should be allowed. 

898 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

899 self.assertFalse(registered) 

900 butler.registry.associate(tag1, [ref3]) 

901 # Add a CHAINED collection that searches run1 and then run2. It 

902 # logically contains only ref1, because ref2 is shadowed due to them 

903 # having the same data ID and dataset type. 

904 chain1 = "chain1" 

905 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

906 butler.registry.setCollectionChain(chain1, [run1, run2]) 

907 # Try to delete RUN collections, which should fail with complete 

908 # rollback because they're still referenced by the CHAINED 

909 # collection. 

910 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

911 butler.pruneCollection(run1, purge=True, unstore=True) 

912 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

913 butler.pruneCollection(run2, purge=True, unstore=True) 

914 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

915 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

916 self.assertTrue(existence[ref1]) 

917 self.assertTrue(existence[ref2]) 

918 self.assertTrue(existence[ref3]) 

919 # Try to delete CHAINED and TAGGED collections with purge; should not 

920 # work. 

921 with self.assertRaises(TypeError): 

922 butler.pruneCollection(tag1, purge=True, unstore=True) 

923 with self.assertRaises(TypeError): 

924 butler.pruneCollection(chain1, purge=True, unstore=True) 

925 # Remove the tagged collection with unstore=False. This should not 

926 # affect the datasets. 

927 butler.pruneCollection(tag1) 

928 with self.assertRaises(MissingCollectionError): 

929 butler.registry.getCollectionType(tag1) 

930 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

931 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

932 self.assertTrue(existence[ref1]) 

933 self.assertTrue(existence[ref2]) 

934 self.assertTrue(existence[ref3]) 

935 # Add the tagged collection back in, and remove it with unstore=True. 

936 # This should remove ref3 only from the datastore. 

937 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

938 butler.registry.associate(tag1, [ref3]) 

939 butler.pruneCollection(tag1, unstore=True) 

940 with self.assertRaises(MissingCollectionError): 

941 butler.registry.getCollectionType(tag1) 

942 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

943 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

944 self.assertTrue(existence[ref1]) 

945 self.assertTrue(existence[ref2]) 

946 self.assertFalse(existence[ref3]) 

947 # Delete the chain with unstore=False. The datasets should not be 

948 # affected at all. 

949 butler.pruneCollection(chain1) 

950 with self.assertRaises(MissingCollectionError): 

951 butler.registry.getCollectionType(chain1) 

952 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

953 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

954 self.assertTrue(existence[ref1]) 

955 self.assertTrue(existence[ref2]) 

956 self.assertFalse(existence[ref3]) 

957 # Redefine and then delete the chain with unstore=True. Only ref1 

958 # should be unstored (ref3 has already been unstored, but otherwise 

959 # would be now). 

960 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

961 butler.registry.setCollectionChain(chain1, [run1, run2]) 

962 butler.pruneCollection(chain1, unstore=True) 

963 with self.assertRaises(MissingCollectionError): 

964 butler.registry.getCollectionType(chain1) 

965 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

966 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

967 self.assertFalse(existence[ref1]) 

968 self.assertTrue(existence[ref2]) 

969 self.assertFalse(existence[ref3]) 

970 # Remove run1. This removes ref1 and ref3 from the registry (they're 

971 # already gone from the datastore, which is fine). 

972 butler.pruneCollection(run1, purge=True, unstore=True) 

973 with self.assertRaises(MissingCollectionError): 

974 butler.registry.getCollectionType(run1) 

975 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

976 self.assertTrue(butler.datastore.exists(ref2)) 

977 # Remove run2. This removes ref2 from the registry and the datastore. 

978 butler.pruneCollection(run2, purge=True, unstore=True) 

979 with self.assertRaises(MissingCollectionError): 

980 butler.registry.getCollectionType(run2) 

981 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

982 

983 # Now that the collections have been pruned we can remove the 

984 # dataset type 

985 butler.registry.removeDatasetType(datasetType.name) 

986 

987 def testPickle(self): 

988 """Test pickle support.""" 

989 butler = Butler(self.tmpConfigFile, run=self.default_run) 

990 butlerOut = pickle.loads(pickle.dumps(butler)) 

991 self.assertIsInstance(butlerOut, Butler) 

992 self.assertEqual(butlerOut._config, butler._config) 

993 self.assertEqual(butlerOut.collections, butler.collections) 

994 self.assertEqual(butlerOut.run, butler.run) 

995 

996 def testGetDatasetTypes(self): 

997 butler = Butler(self.tmpConfigFile, run=self.default_run) 

998 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

999 dimensionEntries = [ 

1000 ( 

1001 "instrument", 

1002 {"instrument": "DummyCam"}, 

1003 {"instrument": "DummyHSC"}, 

1004 {"instrument": "DummyCamComp"}, 

1005 ), 

1006 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1007 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1008 ] 

1009 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1010 # Add needed Dimensions 

1011 for args in dimensionEntries: 

1012 butler.registry.insertDimensionData(*args) 

1013 

1014 # When a DatasetType is added to the registry entries are not created 

1015 # for components but querying them can return the components. 

1016 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1017 components = set() 

1018 for datasetTypeName in datasetTypeNames: 

1019 # Create and register a DatasetType 

1020 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1021 

1022 for componentName in storageClass.components: 

1023 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1024 

1025 fromRegistry: set[DatasetType] = set() 

1026 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1027 fromRegistry.add(parent_dataset_type) 

1028 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1029 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1030 

1031 # Now that we have some dataset types registered, validate them 

1032 butler.validateConfiguration( 

1033 ignore=[ 

1034 "test_metric_comp", 

1035 "metric3", 

1036 "metric5", 

1037 "calexp", 

1038 "DummySC", 

1039 "datasetType.component", 

1040 "random_data", 

1041 "random_data_2", 

1042 ] 

1043 ) 

1044 

1045 # Add a new datasetType that will fail template validation 

1046 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1047 if self.validationCanFail: 

1048 with self.assertRaises(ValidationError): 

1049 butler.validateConfiguration() 

1050 

1051 # Rerun validation but with a subset of dataset type names 

1052 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1053 

1054 # Rerun validation but ignore the bad datasetType 

1055 butler.validateConfiguration( 

1056 ignore=[ 

1057 "test_metric_comp", 

1058 "metric3", 

1059 "metric5", 

1060 "calexp", 

1061 "DummySC", 

1062 "datasetType.component", 

1063 "random_data", 

1064 "random_data_2", 

1065 ] 

1066 ) 

1067 

1068 def testTransaction(self): 

1069 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1070 datasetTypeName = "test_metric" 

1071 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1072 dimensionEntries = ( 

1073 ("instrument", {"instrument": "DummyCam"}), 

1074 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1075 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1076 ) 

1077 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1078 metric = makeExampleMetrics() 

1079 dataId = {"instrument": "DummyCam", "visit": 42} 

1080 # Create and register a DatasetType 

1081 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1082 with self.assertRaises(TransactionTestError): 

1083 with butler.transaction(): 

1084 # Add needed Dimensions 

1085 for args in dimensionEntries: 

1086 butler.registry.insertDimensionData(*args) 

1087 # Store a dataset 

1088 ref = butler.put(metric, datasetTypeName, dataId) 

1089 self.assertIsInstance(ref, DatasetRef) 

1090 # Test getDirect 

1091 metricOut = butler.getDirect(ref) 

1092 self.assertEqual(metric, metricOut) 

1093 # Test get 

1094 metricOut = butler.get(datasetTypeName, dataId) 

1095 self.assertEqual(metric, metricOut) 

1096 # Check we can get components 

1097 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1098 raise TransactionTestError("This should roll back the entire transaction") 

1099 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1100 butler.registry.expandDataId(dataId) 

1101 # Should raise LookupError for missing data ID value 

1102 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1103 butler.get(datasetTypeName, dataId) 

1104 # Also check explicitly if Dataset entry is missing 

1105 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1106 # Direct retrieval should not find the file in the Datastore 

1107 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1108 butler.getDirect(ref) 

1109 

1110 def testMakeRepo(self): 

1111 """Test that we can write butler configuration to a new repository via 

1112 the Butler.makeRepo interface and then instantiate a butler from the 

1113 repo root. 

1114 """ 

1115 # Do not run the test if we know this datastore configuration does 

1116 # not support a file system root 

1117 if self.fullConfigKey is None: 

1118 return 

1119 

1120 # create two separate directories 

1121 root1 = tempfile.mkdtemp(dir=self.root) 

1122 root2 = tempfile.mkdtemp(dir=self.root) 

1123 

1124 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1125 limited = Config(self.configFile) 

1126 butler1 = Butler(butlerConfig) 

1127 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1128 full = Config(self.tmpConfigFile) 

1129 butler2 = Butler(butlerConfig) 

1130 # Butlers should have the same configuration regardless of whether 

1131 # defaults were expanded. 

1132 self.assertEqual(butler1._config, butler2._config) 

1133 # Config files loaded directly should not be the same. 

1134 self.assertNotEqual(limited, full) 

1135 # Make sure "limited" doesn't have a few keys we know it should be 

1136 # inheriting from defaults. 

1137 self.assertIn(self.fullConfigKey, full) 

1138 self.assertNotIn(self.fullConfigKey, limited) 

1139 

1140 # Collections don't appear until something is put in them 

1141 collections1 = set(butler1.registry.queryCollections()) 

1142 self.assertEqual(collections1, set()) 

1143 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1144 

1145 # Check that a config with no associated file name will not 

1146 # work properly with relocatable Butler repo 

1147 butlerConfig.configFile = None 

1148 with self.assertRaises(ValueError): 

1149 Butler(butlerConfig) 

1150 

1151 with self.assertRaises(FileExistsError): 

1152 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1153 

1154 def testStringification(self): 

1155 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1156 butlerStr = str(butler) 

1157 

1158 if self.datastoreStr is not None: 

1159 for testStr in self.datastoreStr: 

1160 self.assertIn(testStr, butlerStr) 

1161 if self.registryStr is not None: 

1162 self.assertIn(self.registryStr, butlerStr) 

1163 

1164 datastoreName = butler.datastore.name 

1165 if self.datastoreName is not None: 

1166 for testStr in self.datastoreName: 

1167 self.assertIn(testStr, datastoreName) 

1168 

1169 def testButlerRewriteDataId(self): 

1170 """Test that dataIds can be rewritten based on dimension records.""" 

1171 

1172 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1173 

1174 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1175 datasetTypeName = "random_data" 

1176 

1177 # Create dimension records. 

1178 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1179 butler.registry.insertDimensionData( 

1180 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1181 ) 

1182 butler.registry.insertDimensionData( 

1183 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1184 ) 

1185 

1186 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1187 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1188 butler.registry.registerDatasetType(datasetType) 

1189 

1190 n_exposures = 5 

1191 dayobs = 20210530 

1192 

1193 for i in range(n_exposures): 

1194 butler.registry.insertDimensionData( 

1195 "exposure", 

1196 { 

1197 "instrument": "DummyCamComp", 

1198 "id": i, 

1199 "obs_id": f"exp{i}", 

1200 "seq_num": i, 

1201 "day_obs": dayobs, 

1202 "physical_filter": "d-r", 

1203 }, 

1204 ) 

1205 

1206 # Write some data. 

1207 for i in range(n_exposures): 

1208 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1209 

1210 # Use the seq_num for the put to test rewriting. 

1211 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1212 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1213 

1214 # Check that the exposure is correct in the dataId 

1215 self.assertEqual(ref.dataId["exposure"], i) 

1216 

1217 # and check that we can get the dataset back with the same dataId 

1218 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1219 self.assertEqual(new_metric, metric) 

1220 

1221 

1222class FileDatastoreButlerTests(ButlerTests): 

1223 """Common tests and specialization of ButlerTests for butlers backed 

1224 by datastores that inherit from FileDatastore. 

1225 """ 

1226 

1227 def checkFileExists(self, root, relpath): 

1228 """Checks if file exists at a given path (relative to root). 

1229 

1230 Test testPutTemplates verifies actual physical existance of the files 

1231 in the requested location. 

1232 """ 

1233 uri = ResourcePath(root, forceDirectory=True) 

1234 return uri.join(relpath).exists() 

1235 

1236 def testPutTemplates(self): 

1237 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1238 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1239 

1240 # Add needed Dimensions 

1241 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1242 butler.registry.insertDimensionData( 

1243 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1244 ) 

1245 butler.registry.insertDimensionData( 

1246 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1247 ) 

1248 butler.registry.insertDimensionData( 

1249 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1250 ) 

1251 

1252 # Create and store a dataset 

1253 metric = makeExampleMetrics() 

1254 

1255 # Create two almost-identical DatasetTypes (both will use default 

1256 # template) 

1257 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1258 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1259 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1260 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1261 

1262 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1263 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1264 

1265 # Put with exactly the data ID keys needed 

1266 ref = butler.put(metric, "metric1", dataId1) 

1267 uri = butler.getURI(ref) 

1268 self.assertTrue( 

1269 self.checkFileExists( 

1270 butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle" 

1271 ), 

1272 f"Checking existence of {uri}", 

1273 ) 

1274 

1275 # Check the template based on dimensions 

1276 butler.datastore.templates.validateTemplates([ref]) 

1277 

1278 # Put with extra data ID keys (physical_filter is an optional 

1279 # dependency); should not change template (at least the way we're 

1280 # defining them to behave now; the important thing is that they 

1281 # must be consistent). 

1282 ref = butler.put(metric, "metric2", dataId2) 

1283 uri = butler.getURI(ref) 

1284 self.assertTrue( 

1285 self.checkFileExists( 

1286 butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle" 

1287 ), 

1288 f"Checking existence of {uri}", 

1289 ) 

1290 

1291 # Check the template based on dimensions 

1292 butler.datastore.templates.validateTemplates([ref]) 

1293 

1294 # Use a template that has a typo in dimension record metadata. 

1295 # Easier to test with a butler that has a ref with records attached. 

1296 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1297 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1298 path = template.format(ref) 

1299 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1300 

1301 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1302 with self.assertRaises(KeyError): 

1303 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1304 template.format(ref) 

1305 

1306 # Now use a file template that will not result in unique filenames 

1307 with self.assertRaises(FileTemplateValidationError): 

1308 butler.put(metric, "metric3", dataId1) 

1309 

1310 def testImportExport(self): 

1311 # Run put/get tests just to create and populate a repo. 

1312 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1313 self.runImportExportTest(storageClass) 

1314 

1315 @unittest.expectedFailure 

1316 def testImportExportVirtualComposite(self): 

1317 # Run put/get tests just to create and populate a repo. 

1318 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1319 self.runImportExportTest(storageClass) 

1320 

1321 def runImportExportTest(self, storageClass): 

1322 """This test does an export to a temp directory and an import back 

1323 into a new temp directory repo. It does not assume a posix datastore""" 

1324 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1325 print("Root:", exportButler.datastore.root) 

1326 # Test that the repo actually has at least one dataset. 

1327 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1328 self.assertGreater(len(datasets), 0) 

1329 # Add a DimensionRecord that's unused by those datasets. 

1330 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1331 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1332 # Export and then import datasets. 

1333 with safeTestTempDir(TESTDIR) as exportDir: 

1334 exportFile = os.path.join(exportDir, "exports.yaml") 

1335 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1336 export.saveDatasets(datasets) 

1337 # Export the same datasets again. This should quietly do 

1338 # nothing because of internal deduplication, and it shouldn't 

1339 # complain about being asked to export the "htm7" elements even 

1340 # though there aren't any in these datasets or in the database. 

1341 export.saveDatasets(datasets, elements=["htm7"]) 

1342 # Save one of the data IDs again; this should be harmless 

1343 # because of internal deduplication. 

1344 export.saveDataIds([datasets[0].dataId]) 

1345 # Save some dimension records directly. 

1346 export.saveDimensionData("skymap", [skymapRecord]) 

1347 self.assertTrue(os.path.exists(exportFile)) 

1348 with safeTestTempDir(TESTDIR) as importDir: 

1349 # We always want this to be a local posix butler 

1350 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1351 # Calling script.butlerImport tests the implementation of the 

1352 # butler command line interface "import" subcommand. Functions 

1353 # in the script folder are generally considered protected and 

1354 # should not be used as public api. 

1355 with open(exportFile, "r") as f: 

1356 script.butlerImport( 

1357 importDir, 

1358 export_file=f, 

1359 directory=exportDir, 

1360 transfer="auto", 

1361 skip_dimensions=None, 

1362 reuse_ids=False, 

1363 ) 

1364 importButler = Butler(importDir, run=self.default_run) 

1365 for ref in datasets: 

1366 with self.subTest(ref=ref): 

1367 # Test for existence by passing in the DatasetType and 

1368 # data ID separately, to avoid lookup by dataset_id. 

1369 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1370 self.assertEqual( 

1371 list(importButler.registry.queryDimensionRecords("skymap")), 

1372 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1373 ) 

1374 

1375 def testRemoveRuns(self): 

1376 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1377 butler = Butler(self.tmpConfigFile, writeable=True) 

1378 # Load registry data with dimensions to hang datasets off of. 

1379 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1380 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1381 # Add some RUN-type collection. 

1382 run1 = "run1" 

1383 butler.registry.registerRun(run1) 

1384 run2 = "run2" 

1385 butler.registry.registerRun(run2) 

1386 # put a dataset in each 

1387 metric = makeExampleMetrics() 

1388 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1389 datasetType = self.addDatasetType( 

1390 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1391 ) 

1392 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1393 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1394 uri1 = butler.getURI(ref1, collections=[run1]) 

1395 uri2 = butler.getURI(ref2, collections=[run2]) 

1396 # Remove from both runs with different values for unstore. 

1397 butler.removeRuns([run1], unstore=True) 

1398 butler.removeRuns([run2], unstore=False) 

1399 # Should be nothing in registry for either one, and datastore should 

1400 # not think either exists. 

1401 with self.assertRaises(MissingCollectionError): 

1402 butler.registry.getCollectionType(run1) 

1403 with self.assertRaises(MissingCollectionError): 

1404 butler.registry.getCollectionType(run2) 

1405 self.assertFalse(butler.datastore.exists(ref1)) 

1406 self.assertFalse(butler.datastore.exists(ref2)) 

1407 # The ref we unstored should be gone according to the URI, but the 

1408 # one we forgot should still be around. 

1409 self.assertFalse(uri1.exists()) 

1410 self.assertTrue(uri2.exists()) 

1411 

1412 

1413class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1414 """PosixDatastore specialization of a butler""" 

1415 

1416 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1417 fullConfigKey = ".datastore.formatters" 

1418 validationCanFail = True 

1419 datastoreStr = ["/tmp"] 

1420 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1421 registryStr = "/gen3.sqlite3" 

1422 

1423 def testPathConstructor(self): 

1424 """Independent test of constructor using PathLike.""" 

1425 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1426 self.assertIsInstance(butler, Butler) 

1427 

1428 # And again with a Path object with the butler yaml 

1429 path = pathlib.Path(self.tmpConfigFile) 

1430 butler = Butler(path, writeable=False) 

1431 self.assertIsInstance(butler, Butler) 

1432 

1433 # And again with a Path object without the butler yaml 

1434 # (making sure we skip it if the tmp config doesn't end 

1435 # in butler.yaml -- which is the case for a subclass) 

1436 if self.tmpConfigFile.endswith("butler.yaml"): 

1437 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1438 butler = Butler(path, writeable=False) 

1439 self.assertIsInstance(butler, Butler) 

1440 

1441 def testExportTransferCopy(self): 

1442 """Test local export using all transfer modes""" 

1443 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1444 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1445 # Test that the repo actually has at least one dataset. 

1446 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1447 self.assertGreater(len(datasets), 0) 

1448 uris = [exportButler.getURI(d) for d in datasets] 

1449 datastoreRoot = exportButler.datastore.root 

1450 

1451 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1452 

1453 for path in pathsInStore: 

1454 # Assume local file system 

1455 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1456 

1457 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1458 with safeTestTempDir(TESTDIR) as exportDir: 

1459 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1460 export.saveDatasets(datasets) 

1461 for path in pathsInStore: 

1462 self.assertTrue( 

1463 self.checkFileExists(exportDir, path), 

1464 f"Check that mode {transfer} exported files", 

1465 ) 

1466 

1467 def testPruneDatasets(self): 

1468 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1469 butler = Butler(self.tmpConfigFile, writeable=True) 

1470 # Load registry data with dimensions to hang datasets off of. 

1471 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1472 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1473 # Add some RUN-type collections. 

1474 run1 = "run1" 

1475 butler.registry.registerRun(run1) 

1476 run2 = "run2" 

1477 butler.registry.registerRun(run2) 

1478 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1479 # different runs. ref3 has a different data ID. 

1480 metric = makeExampleMetrics() 

1481 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1482 datasetType = self.addDatasetType( 

1483 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1484 ) 

1485 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1486 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1487 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1488 

1489 # Simple prune. 

1490 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1491 with self.assertRaises(LookupError): 

1492 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1493 

1494 # Put data back. 

1495 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1496 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1497 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1498 

1499 # Check that in normal mode, deleting the record will lead to 

1500 # trash not touching the file. 

1501 uri1 = butler.datastore.getURI(ref1) 

1502 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1503 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1504 butler.datastore.trash(ref1) 

1505 butler.datastore.emptyTrash() 

1506 self.assertTrue(uri1.exists()) 

1507 uri1.remove() # Clean it up. 

1508 

1509 # Simulate execution butler setup by deleting the datastore 

1510 # record but keeping the file around and trusting. 

1511 butler.datastore.trustGetRequest = True 

1512 uri2 = butler.datastore.getURI(ref2) 

1513 uri3 = butler.datastore.getURI(ref3) 

1514 self.assertTrue(uri2.exists()) 

1515 self.assertTrue(uri3.exists()) 

1516 

1517 # Remove the datastore record. 

1518 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1519 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1520 self.assertTrue(uri2.exists()) 

1521 butler.datastore.trash([ref2, ref3]) 

1522 # Immediate removal for ref2 file 

1523 self.assertFalse(uri2.exists()) 

1524 # But ref3 has to wait for the empty. 

1525 self.assertTrue(uri3.exists()) 

1526 butler.datastore.emptyTrash() 

1527 self.assertFalse(uri3.exists()) 

1528 

1529 # Clear out the datasets from registry. 

1530 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1531 

1532 def testPytypeCoercion(self): 

1533 """Test python type coercion on Butler.get and put.""" 

1534 

1535 # Store some data with the normal example storage class. 

1536 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1537 datasetTypeName = "test_metric" 

1538 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1539 

1540 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1541 metric = butler.get(datasetTypeName, dataId=dataId) 

1542 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1543 

1544 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1545 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1546 

1547 # Now need to hack the registry dataset type definition. 

1548 # There is no API for this. 

1549 manager = butler.registry._managers.datasets 

1550 manager._db.update( 

1551 manager._static.dataset_type, 

1552 {"name": datasetTypeName}, 

1553 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1554 ) 

1555 

1556 # Force reset of dataset type cache 

1557 butler.registry.refresh() 

1558 

1559 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1560 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1561 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1562 

1563 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1564 self.assertNotEqual(type(metric_model), type(metric)) 

1565 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1566 

1567 # Put the model and read it back to show that everything now 

1568 # works as normal. 

1569 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1570 metric_model_new = butler.get(metric_ref) 

1571 self.assertEqual(metric_model_new, metric_model) 

1572 

1573 # Hack the storage class again to something that will fail on the 

1574 # get with no conversion class. 

1575 manager._db.update( 

1576 manager._static.dataset_type, 

1577 {"name": datasetTypeName}, 

1578 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1579 ) 

1580 butler.registry.refresh() 

1581 

1582 with self.assertRaises(ValueError): 

1583 butler.get(datasetTypeName, dataId=dataId) 

1584 

1585 

1586@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1587class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1588 """PosixDatastore specialization of a butler using Postgres""" 

1589 

1590 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1591 fullConfigKey = ".datastore.formatters" 

1592 validationCanFail = True 

1593 datastoreStr = ["/tmp"] 

1594 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1595 registryStr = "PostgreSQL@test" 

1596 

1597 @staticmethod 

1598 def _handler(postgresql): 

1599 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1600 with engine.begin() as connection: 

1601 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1602 

1603 @classmethod 

1604 def setUpClass(cls): 

1605 # Create the postgres test server. 

1606 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1607 cache_initialized_db=True, on_initialized=cls._handler 

1608 ) 

1609 super().setUpClass() 

1610 

1611 @classmethod 

1612 def tearDownClass(cls): 

1613 # Clean up any lingering SQLAlchemy engines/connections 

1614 # so they're closed before we shut down the server. 

1615 gc.collect() 

1616 cls.postgresql.clear_cache() 

1617 super().tearDownClass() 

1618 

1619 def setUp(self): 

1620 self.server = self.postgresql() 

1621 

1622 # Need to add a registry section to the config. 

1623 self._temp_config = False 

1624 config = Config(self.configFile) 

1625 config["registry", "db"] = self.server.url() 

1626 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1627 config.dump(fh) 

1628 self.configFile = fh.name 

1629 self._temp_config = True 

1630 super().setUp() 

1631 

1632 def tearDown(self): 

1633 self.server.stop() 

1634 if self._temp_config and os.path.exists(self.configFile): 

1635 os.remove(self.configFile) 

1636 super().tearDown() 

1637 

1638 def testMakeRepo(self): 

1639 # The base class test assumes that it's using sqlite and assumes 

1640 # the config file is acceptable to sqlite. 

1641 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1642 

1643 

1644class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1645 """InMemoryDatastore specialization of a butler""" 

1646 

1647 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1648 fullConfigKey = None 

1649 useTempRoot = False 

1650 validationCanFail = False 

1651 datastoreStr = ["datastore='InMemory"] 

1652 datastoreName = ["InMemoryDatastore@"] 

1653 registryStr = "/gen3.sqlite3" 

1654 

1655 def testIngest(self): 

1656 pass 

1657 

1658 

1659class ChainedDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1660 """PosixDatastore specialization""" 

1661 

1662 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1663 fullConfigKey = ".datastore.datastores.1.formatters" 

1664 validationCanFail = True 

1665 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1666 datastoreName = [ 

1667 "InMemoryDatastore@", 

1668 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1669 "SecondDatastore", 

1670 ] 

1671 registryStr = "/gen3.sqlite3" 

1672 

1673 

1674class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1675 """Test that a yaml file in one location can refer to a root in another.""" 

1676 

1677 datastoreStr = ["dir1"] 

1678 # Disable the makeRepo test since we are deliberately not using 

1679 # butler.yaml as the config name. 

1680 fullConfigKey = None 

1681 

1682 def setUp(self): 

1683 self.root = makeTestTempDir(TESTDIR) 

1684 

1685 # Make a new repository in one place 

1686 self.dir1 = os.path.join(self.root, "dir1") 

1687 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1688 

1689 # Move the yaml file to a different place and add a "root" 

1690 self.dir2 = os.path.join(self.root, "dir2") 

1691 os.makedirs(self.dir2, exist_ok=True) 

1692 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1693 config = Config(configFile1) 

1694 config["root"] = self.dir1 

1695 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1696 config.dumpToUri(configFile2) 

1697 os.remove(configFile1) 

1698 self.tmpConfigFile = configFile2 

1699 

1700 def testFileLocations(self): 

1701 self.assertNotEqual(self.dir1, self.dir2) 

1702 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1703 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1704 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1705 

1706 

1707class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1708 """Test that a config file created by makeRepo outside of repo works.""" 

1709 

1710 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1711 

1712 def setUp(self): 

1713 self.root = makeTestTempDir(TESTDIR) 

1714 self.root2 = makeTestTempDir(TESTDIR) 

1715 

1716 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1717 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1718 

1719 def tearDown(self): 

1720 if os.path.exists(self.root2): 

1721 shutil.rmtree(self.root2, ignore_errors=True) 

1722 super().tearDown() 

1723 

1724 def testConfigExistence(self): 

1725 c = Config(self.tmpConfigFile) 

1726 uri_config = ResourcePath(c["root"]) 

1727 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1728 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1729 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1730 

1731 def testPutGet(self): 

1732 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1733 self.runPutGetTest(storageClass, "test_metric") 

1734 

1735 

1736class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1737 """Test that a config file created by makeRepo outside of repo works.""" 

1738 

1739 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1740 

1741 def setUp(self): 

1742 self.root = makeTestTempDir(TESTDIR) 

1743 self.root2 = makeTestTempDir(TESTDIR) 

1744 

1745 self.tmpConfigFile = self.root2 

1746 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1747 

1748 def testConfigExistence(self): 

1749 # Append the yaml file else Config constructor does not know the file 

1750 # type. 

1751 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1752 super().testConfigExistence() 

1753 

1754 

1755class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1756 """Test that a config file created by makeRepo outside of repo works.""" 

1757 

1758 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1759 

1760 def setUp(self): 

1761 self.root = makeTestTempDir(TESTDIR) 

1762 self.root2 = makeTestTempDir(TESTDIR) 

1763 

1764 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1765 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1766 

1767 

1768@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1769class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1770 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1771 a local in-memory SqlRegistry. 

1772 """ 

1773 

1774 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1775 fullConfigKey = None 

1776 validationCanFail = True 

1777 

1778 bucketName = "anybucketname" 

1779 """Name of the Bucket that will be used in the tests. The name is read from 

1780 the config file used with the tests during set-up. 

1781 """ 

1782 

1783 root = "butlerRoot/" 

1784 """Root repository directory expected to be used in case useTempRoot=False. 

1785 Otherwise the root is set to a 20 characters long randomly generated string 

1786 during set-up. 

1787 """ 

1788 

1789 datastoreStr = [f"datastore={root}"] 

1790 """Contains all expected root locations in a format expected to be 

1791 returned by Butler stringification. 

1792 """ 

1793 

1794 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1795 """The expected format of the S3 Datastore string.""" 

1796 

1797 registryStr = "/gen3.sqlite3" 

1798 """Expected format of the Registry string.""" 

1799 

1800 mock_s3 = mock_s3() 

1801 """The mocked s3 interface from moto.""" 

1802 

1803 def genRoot(self): 

1804 """Returns a random string of len 20 to serve as a root 

1805 name for the temporary bucket repo. 

1806 

1807 This is equivalent to tempfile.mkdtemp as this is what self.root 

1808 becomes when useTempRoot is True. 

1809 """ 

1810 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1811 return rndstr + "/" 

1812 

1813 def setUp(self): 

1814 config = Config(self.configFile) 

1815 uri = ResourcePath(config[".datastore.datastore.root"]) 

1816 self.bucketName = uri.netloc 

1817 

1818 # Enable S3 mocking of tests. 

1819 self.mock_s3.start() 

1820 

1821 # set up some fake credentials if they do not exist 

1822 self.usingDummyCredentials = setAwsEnvCredentials() 

1823 

1824 if self.useTempRoot: 

1825 self.root = self.genRoot() 

1826 rooturi = f"s3://{self.bucketName}/{self.root}" 

1827 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1828 

1829 # need local folder to store registry database 

1830 self.reg_dir = makeTestTempDir(TESTDIR) 

1831 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1832 

1833 # MOTO needs to know that we expect Bucket bucketname to exist 

1834 # (this used to be the class attribute bucketName) 

1835 s3 = boto3.resource("s3") 

1836 s3.create_bucket(Bucket=self.bucketName) 

1837 

1838 self.datastoreStr = f"datastore={self.root}" 

1839 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1840 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1841 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1842 

1843 def tearDown(self): 

1844 s3 = boto3.resource("s3") 

1845 bucket = s3.Bucket(self.bucketName) 

1846 try: 

1847 bucket.objects.all().delete() 

1848 except botocore.exceptions.ClientError as e: 

1849 if e.response["Error"]["Code"] == "404": 

1850 # the key was not reachable - pass 

1851 pass 

1852 else: 

1853 raise 

1854 

1855 bucket = s3.Bucket(self.bucketName) 

1856 bucket.delete() 

1857 

1858 # Stop the S3 mock. 

1859 self.mock_s3.stop() 

1860 

1861 # unset any potentially set dummy credentials 

1862 if self.usingDummyCredentials: 

1863 unsetAwsEnvCredentials() 

1864 

1865 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1866 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1867 

1868 if self.useTempRoot and os.path.exists(self.root): 

1869 shutil.rmtree(self.root, ignore_errors=True) 

1870 

1871 super().tearDown() 

1872 

1873 

1874@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1875class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1876 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1877 a local in-memory SqlRegistry. 

1878 """ 

1879 

1880 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1881 fullConfigKey = None 

1882 validationCanFail = True 

1883 

1884 serverName = "localhost" 

1885 """Name of the server that will be used in the tests. 

1886 """ 

1887 

1888 portNumber = 8080 

1889 """Port on which the webdav server listens. Automatically chosen 

1890 at setUpClass via the _getfreeport() method 

1891 """ 

1892 

1893 root = "butlerRoot/" 

1894 """Root repository directory expected to be used in case useTempRoot=False. 

1895 Otherwise the root is set to a 20 characters long randomly generated string 

1896 during set-up. 

1897 """ 

1898 

1899 datastoreStr = [f"datastore={root}"] 

1900 """Contains all expected root locations in a format expected to be 

1901 returned by Butler stringification. 

1902 """ 

1903 

1904 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1905 """The expected format of the WebdavDatastore string.""" 

1906 

1907 registryStr = "/gen3.sqlite3" 

1908 """Expected format of the Registry string.""" 

1909 

1910 serverThread = None 

1911 """Thread in which the local webdav server will run""" 

1912 

1913 stopWebdavServer = False 

1914 """This flag will cause the webdav server to 

1915 gracefully shut down when True 

1916 """ 

1917 

1918 def genRoot(self): 

1919 """Returns a random string of len 20 to serve as a root 

1920 name for the temporary bucket repo. 

1921 

1922 This is equivalent to tempfile.mkdtemp as this is what self.root 

1923 becomes when useTempRoot is True. 

1924 """ 

1925 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1926 return rndstr + "/" 

1927 

1928 @classmethod 

1929 def setUpClass(cls): 

1930 # Do the same as inherited class 

1931 cls.storageClassFactory = StorageClassFactory() 

1932 cls.storageClassFactory.addFromConfig(cls.configFile) 

1933 

1934 cls.portNumber = cls._getfreeport() 

1935 # Run a local webdav server on which tests will be run 

1936 cls.serverThread = Thread( 

1937 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1938 ) 

1939 cls.serverThread.start() 

1940 # Wait for it to start 

1941 time.sleep(3) 

1942 

1943 @classmethod 

1944 def tearDownClass(cls): 

1945 # Ask for graceful shut down of the webdav server 

1946 cls.stopWebdavServer = True 

1947 # Wait for the thread to exit 

1948 cls.serverThread.join() 

1949 super().tearDownClass() 

1950 

1951 def setUp(self): 

1952 config = Config(self.configFile) 

1953 

1954 if self.useTempRoot: 

1955 self.root = self.genRoot() 

1956 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1957 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1958 

1959 # need local folder to store registry database 

1960 self.reg_dir = makeTestTempDir(TESTDIR) 

1961 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1962 

1963 self.datastoreStr = f"datastore={self.root}" 

1964 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1965 

1966 if not _is_webdav_endpoint(self.rooturi): 

1967 raise OSError("Webdav server not running properly: cannot run tests.") 

1968 

1969 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1970 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1971 

1972 def tearDown(self): 

1973 # Clear temporary directory 

1974 ResourcePath(self.rooturi).remove() 

1975 ResourcePath(self.rooturi).session.close() 

1976 

1977 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1978 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1979 

1980 if self.useTempRoot and os.path.exists(self.root): 

1981 shutil.rmtree(self.root, ignore_errors=True) 

1982 

1983 super().tearDown() 

1984 

1985 def _serveWebdav(self, port: int, stopWebdavServer): 

1986 """Starts a local webdav-compatible HTTP server, 

1987 Listening on http://localhost:port 

1988 This server only runs when this test class is instantiated, 

1989 and then shuts down. Must be started is a separate thread. 

1990 

1991 Parameters 

1992 ---------- 

1993 port : `int` 

1994 The port number on which the server should listen 

1995 """ 

1996 root_path = gettempdir() 

1997 

1998 config = { 

1999 "host": "0.0.0.0", 

2000 "port": port, 

2001 "provider_mapping": {"/": root_path}, 

2002 "http_authenticator": {"domain_controller": None}, 

2003 "simple_dc": {"user_mapping": {"*": True}}, 

2004 "verbose": 0, 

2005 } 

2006 app = WsgiDAVApp(config) 

2007 

2008 server_args = { 

2009 "bind_addr": (config["host"], config["port"]), 

2010 "wsgi_app": app, 

2011 } 

2012 server = wsgi.Server(**server_args) 

2013 server.prepare() 

2014 

2015 try: 

2016 # Start the actual server in a separate thread 

2017 t = Thread(target=server.serve, daemon=True) 

2018 t.start() 

2019 # watch stopWebdavServer, and gracefully 

2020 # shut down the server when True 

2021 while True: 

2022 if stopWebdavServer(): 

2023 break 

2024 time.sleep(1) 

2025 except KeyboardInterrupt: 

2026 print("Caught Ctrl-C, shutting down...") 

2027 finally: 

2028 server.stop() 

2029 t.join() 

2030 

2031 def _getfreeport(): 

2032 """ 

2033 Determines a free port using sockets. 

2034 """ 

2035 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

2036 free_socket.bind(("127.0.0.1", 0)) 

2037 free_socket.listen() 

2038 port = free_socket.getsockname()[1] 

2039 free_socket.close() 

2040 return port 

2041 

2042 

2043class PosixDatastoreTransfers(unittest.TestCase): 

2044 """Test data transfers between butlers. 

2045 

2046 Test for different managers. UUID to UUID and integer to integer are 

2047 tested. UUID to integer is not supported since we do not currently 

2048 want to allow that. Integer to UUID is supported with the caveat 

2049 that UUID4 will be generated and this will be incorrect for raw 

2050 dataset types. The test ignores that. 

2051 """ 

2052 

2053 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2054 

2055 @classmethod 

2056 def setUpClass(cls): 

2057 cls.storageClassFactory = StorageClassFactory() 

2058 cls.storageClassFactory.addFromConfig(cls.configFile) 

2059 

2060 def setUp(self): 

2061 self.root = makeTestTempDir(TESTDIR) 

2062 self.config = Config(self.configFile) 

2063 

2064 def tearDown(self): 

2065 removeTestTempDir(self.root) 

2066 

2067 def create_butler(self, manager, label): 

2068 config = Config(self.configFile) 

2069 config["registry", "managers", "datasets"] = manager 

2070 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2071 

2072 def create_butlers(self, manager1, manager2): 

2073 self.source_butler = self.create_butler(manager1, "1") 

2074 self.target_butler = self.create_butler(manager2, "2") 

2075 

2076 def testTransferUuidToUuid(self): 

2077 self.create_butlers( 

2078 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2079 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2080 ) 

2081 # Setting id_gen_map should have no effect here 

2082 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2083 

2084 def testTransferIntToInt(self): 

2085 with self.assertWarns(FutureWarning): 

2086 self.create_butlers( 

2087 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2088 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2089 ) 

2090 # int dataset ID only allows UNIQUE 

2091 self.assertButlerTransfers() 

2092 

2093 def testTransferIntToUuid(self): 

2094 with self.assertWarns(FutureWarning): 

2095 self.create_butlers( 

2096 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager", 

2097 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2098 ) 

2099 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2100 

2101 def testTransferMissing(self): 

2102 """Test transfers where datastore records are missing. 

2103 

2104 This is how execution butler works. 

2105 """ 

2106 self.create_butlers( 

2107 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2108 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2109 ) 

2110 

2111 # Configure the source butler to allow trust. 

2112 self.source_butler.datastore.trustGetRequest = True 

2113 

2114 self.assertButlerTransfers(purge=True) 

2115 

2116 def testTransferMissingDisassembly(self): 

2117 """Test transfers where datastore records are missing. 

2118 

2119 This is how execution butler works. 

2120 """ 

2121 self.create_butlers( 

2122 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2123 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2124 ) 

2125 

2126 # Configure the source butler to allow trust. 

2127 self.source_butler.datastore.trustGetRequest = True 

2128 

2129 # Test disassembly. 

2130 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2131 

2132 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2133 """Test that a run can be transferred to another butler.""" 

2134 

2135 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2136 datasetTypeName = "random_data" 

2137 

2138 # Test will create 3 collections and we will want to transfer 

2139 # two of those three. 

2140 runs = ["run1", "run2", "other"] 

2141 

2142 # Also want to use two different dataset types to ensure that 

2143 # grouping works. 

2144 datasetTypeNames = ["random_data", "random_data_2"] 

2145 

2146 # Create the run collections in the source butler. 

2147 for run in runs: 

2148 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2149 

2150 # Create dimensions in source butler. 

2151 n_exposures = 30 

2152 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2153 self.source_butler.registry.insertDimensionData( 

2154 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2155 ) 

2156 self.source_butler.registry.insertDimensionData( 

2157 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2158 ) 

2159 

2160 for i in range(n_exposures): 

2161 self.source_butler.registry.insertDimensionData( 

2162 "exposure", 

2163 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2164 ) 

2165 

2166 # Create dataset types in the source butler. 

2167 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2168 for datasetTypeName in datasetTypeNames: 

2169 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2170 self.source_butler.registry.registerDatasetType(datasetType) 

2171 

2172 # Write a dataset to an unrelated run -- this will ensure that 

2173 # we are rewriting integer dataset ids in the target if necessary. 

2174 # Will not be relevant for UUID. 

2175 run = "distraction" 

2176 butler = Butler(butler=self.source_butler, run=run) 

2177 butler.put( 

2178 makeExampleMetrics(), 

2179 datasetTypeName, 

2180 exposure=1, 

2181 instrument="DummyCamComp", 

2182 physical_filter="d-r", 

2183 ) 

2184 

2185 # Write some example metrics to the source 

2186 butler = Butler(butler=self.source_butler) 

2187 

2188 # Set of DatasetRefs that should be in the list of refs to transfer 

2189 # but which will not be transferred. 

2190 deleted = set() 

2191 

2192 n_expected = 20 # Number of datasets expected to be transferred 

2193 source_refs = [] 

2194 for i in range(n_exposures): 

2195 # Put a third of datasets into each collection, only retain 

2196 # two thirds. 

2197 index = i % 3 

2198 run = runs[index] 

2199 datasetTypeName = datasetTypeNames[i % 2] 

2200 

2201 metric_data = { 

2202 "summary": {"counter": i}, 

2203 "output": {"text": "metric"}, 

2204 "data": [2 * x for x in range(i)], 

2205 } 

2206 metric = MetricsExample(**metric_data) 

2207 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2208 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2209 

2210 # Remove the datastore record using low-level API 

2211 if purge: 

2212 # Remove records for a fraction. 

2213 if index == 1: 

2214 

2215 # For one of these delete the file as well. 

2216 # This allows the "missing" code to filter the 

2217 # file out. 

2218 if not deleted: 

2219 primary, uris = butler.datastore.getURIs(ref) 

2220 if primary: 

2221 primary.remove() 

2222 for uri in uris.values(): 

2223 uri.remove() 

2224 n_expected -= 1 

2225 deleted.add(ref) 

2226 

2227 # Remove the datastore record. 

2228 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2229 

2230 if index < 2: 

2231 source_refs.append(ref) 

2232 if ref not in deleted: 

2233 new_metric = butler.get(ref.unresolved(), collections=run) 

2234 self.assertEqual(new_metric, metric) 

2235 

2236 # Create some bad dataset types to ensure we check for inconsistent 

2237 # definitions. 

2238 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2239 for datasetTypeName in datasetTypeNames: 

2240 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2241 self.target_butler.registry.registerDatasetType(datasetType) 

2242 with self.assertRaises(ConflictingDefinitionError) as cm: 

2243 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2244 self.assertIn("dataset type differs", str(cm.exception)) 

2245 

2246 # And remove the bad definitions. 

2247 for datasetTypeName in datasetTypeNames: 

2248 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2249 

2250 # Transfer without creating dataset types should fail. 

2251 with self.assertRaises(KeyError): 

2252 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2253 

2254 # Transfer without creating dimensions should fail. 

2255 with self.assertRaises(ConflictingDefinitionError) as cm: 

2256 self.target_butler.transfer_from( 

2257 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2258 ) 

2259 self.assertIn("dimension", str(cm.exception)) 

2260 

2261 # The failed transfer above leaves registry in an inconsistent 

2262 # state because the run is created but then rolled back without 

2263 # the collection cache being cleared. For now force a refresh. 

2264 # Can remove with DM-35498. 

2265 self.target_butler.registry.refresh() 

2266 

2267 # Now transfer them to the second butler, including dimensions. 

2268 with self.assertLogs(level=logging.DEBUG) as cm: 

2269 transferred = self.target_butler.transfer_from( 

2270 self.source_butler, 

2271 source_refs, 

2272 id_gen_map=id_gen_map, 

2273 register_dataset_types=True, 

2274 transfer_dimensions=True, 

2275 ) 

2276 self.assertEqual(len(transferred), n_expected) 

2277 log_output = ";".join(cm.output) 

2278 self.assertIn("found in datastore for chunk", log_output) 

2279 self.assertIn("Creating output run", log_output) 

2280 

2281 # Do the transfer twice to ensure that it will do nothing extra. 

2282 # Only do this if purge=True because it does not work for int 

2283 # dataset_id. 

2284 if purge: 

2285 # This should not need to register dataset types. 

2286 transferred = self.target_butler.transfer_from( 

2287 self.source_butler, source_refs, id_gen_map=id_gen_map 

2288 ) 

2289 self.assertEqual(len(transferred), n_expected) 

2290 

2291 # Also do an explicit low-level transfer to trigger some 

2292 # edge cases. 

2293 with self.assertLogs(level=logging.DEBUG) as cm: 

2294 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2295 log_output = ";".join(cm.output) 

2296 self.assertIn("no file artifacts exist", log_output) 

2297 

2298 with self.assertRaises(TypeError): 

2299 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2300 

2301 with self.assertRaises(ValueError): 

2302 self.target_butler.datastore.transfer_from( 

2303 self.source_butler.datastore, source_refs, transfer="split" 

2304 ) 

2305 

2306 # Now try to get the same refs from the new butler. 

2307 for ref in source_refs: 

2308 if ref not in deleted: 

2309 unresolved_ref = ref.unresolved() 

2310 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2311 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2312 self.assertEqual(new_metric, old_metric) 

2313 

2314 # Now prune run2 collection and create instead a CHAINED collection. 

2315 # This should block the transfer. 

2316 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2317 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2318 with self.assertRaises(CollectionTypeError): 

2319 # Re-importing the run1 datasets can be problematic if they 

2320 # use integer IDs so filter those out. 

2321 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2322 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2323 

2324 

2325if __name__ == "__main__": 2325 ↛ 2326line 2325 didn't jump to line 2326, because the condition on line 2325 was never true

2326 unittest.main()