Coverage for tests/test_butler.py: 14%

1251 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-04 02:04 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24 

25import gc 

26import logging 

27import os 

28import pathlib 

29import pickle 

30import posixpath 

31import random 

32import shutil 

33import socket 

34import string 

35import tempfile 

36import time 

37import unittest 

38from tempfile import gettempdir 

39from threading import Thread 

40 

41try: 

42 import boto3 

43 import botocore 

44 from moto import mock_s3 

45except ImportError: 

46 boto3 = None 

47 

48 def mock_s3(cls): 

49 """A no-op decorator in case moto mock_s3 can not be imported.""" 

50 return cls 

51 

52 

53try: 

54 # It's possible but silly to have testing.postgresql installed without 

55 # having the postgresql server installed (because then nothing in 

56 # testing.postgresql would work), so we use the presence of that module 

57 # to test whether we can expect the server to be available. 

58 import testing.postgresql 

59except ImportError: 

60 testing = None 

61 

62 

63try: 

64 from cheroot import wsgi 

65 from wsgidav.wsgidav_app import WsgiDAVApp 

66except ImportError: 

67 WsgiDAVApp = None 

68 

69import astropy.time 

70import sqlalchemy 

71from lsst.daf.butler import ( 

72 Butler, 

73 ButlerConfig, 

74 CollectionType, 

75 Config, 

76 DatasetIdGenEnum, 

77 DatasetRef, 

78 DatasetType, 

79 FileDataset, 

80 FileTemplate, 

81 FileTemplateValidationError, 

82 StorageClassFactory, 

83 ValidationError, 

84 script, 

85) 

86from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

87from lsst.daf.butler.registry import ( 

88 CollectionError, 

89 CollectionTypeError, 

90 ConflictingDefinitionError, 

91 DataIdValueError, 

92 MissingCollectionError, 

93) 

94from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

95from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir, safeTestTempDir 

96from lsst.resources import ResourcePath 

97from lsst.resources.http import _is_webdav_endpoint 

98from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

99from lsst.utils import doImport 

100from lsst.utils.introspection import get_full_type_name 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests: 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root = None 

146 default_run = "ingésτ😺" 

147 

148 @staticmethod 

149 def addDatasetType(datasetTypeName, dimensions, storageClass, registry): 

150 """Create a DatasetType and register it""" 

151 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

152 registry.registerDatasetType(datasetType) 

153 return datasetType 

154 

155 @classmethod 

156 def setUpClass(cls): 

157 cls.storageClassFactory = StorageClassFactory() 

158 cls.storageClassFactory.addFromConfig(cls.configFile) 

159 

160 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None): 

161 datasetType = datasetRef.datasetType 

162 dataId = datasetRef.dataId 

163 deferred = butler.getDirectDeferred(datasetRef) 

164 

165 for component in components: 

166 compTypeName = datasetType.componentTypeName(component) 

167 result = butler.get(compTypeName, dataId, collections=collections) 

168 self.assertEqual(result, getattr(reference, component)) 

169 result_deferred = deferred.get(component=component) 

170 self.assertEqual(result_deferred, result) 

171 

172 def tearDown(self): 

173 removeTestTempDir(self.root) 

174 

175 def create_butler(self, run, storageClass, datasetTypeName): 

176 butler = Butler(self.tmpConfigFile, run=run) 

177 

178 collections = set(butler.registry.queryCollections()) 

179 self.assertEqual(collections, set([run])) 

180 

181 # Create and register a DatasetType 

182 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

183 

184 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

185 

186 # Add needed Dimensions 

187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

188 butler.registry.insertDimensionData( 

189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

190 ) 

191 butler.registry.insertDimensionData( 

192 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

193 ) 

194 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

195 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

196 butler.registry.insertDimensionData( 

197 "visit", 

198 { 

199 "instrument": "DummyCamComp", 

200 "id": 423, 

201 "name": "fourtwentythree", 

202 "physical_filter": "d-r", 

203 "visit_system": 1, 

204 "datetime_begin": visit_start, 

205 "datetime_end": visit_end, 

206 }, 

207 ) 

208 

209 # Add more visits for some later tests 

210 for visit_id in (424, 425): 

211 butler.registry.insertDimensionData( 

212 "visit", 

213 { 

214 "instrument": "DummyCamComp", 

215 "id": visit_id, 

216 "name": f"fourtwentyfour_{visit_id}", 

217 "physical_filter": "d-r", 

218 "visit_system": 1, 

219 }, 

220 ) 

221 return butler, datasetType 

222 

223 def runPutGetTest(self, storageClass, datasetTypeName): 

224 # New datasets will be added to run and tag, but we will only look in 

225 # tag when looking up datasets. 

226 run = self.default_run 

227 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

228 

229 # Create and store a dataset 

230 metric = makeExampleMetrics() 

231 dataId = {"instrument": "DummyCamComp", "visit": 423} 

232 

233 # Create a DatasetRef for put 

234 refIn = DatasetRef(datasetType, dataId, id=None) 

235 

236 # Put with a preexisting id should fail 

237 with self.assertRaises(ValueError): 

238 butler.put(metric, DatasetRef(datasetType, dataId, id=100)) 

239 

240 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

241 # and once with a DatasetType 

242 

243 # Keep track of any collections we add and do not clean up 

244 expected_collections = {run} 

245 

246 counter = 0 

247 for args in ((refIn,), (datasetTypeName, dataId), (datasetType, dataId)): 

248 # Since we are using subTest we can get cascading failures 

249 # here with the first attempt failing and the others failing 

250 # immediately because the dataset already exists. Work around 

251 # this by using a distinct run collection each time 

252 counter += 1 

253 this_run = f"put_run_{counter}" 

254 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

255 expected_collections.update({this_run}) 

256 

257 with self.subTest(args=args): 

258 ref = butler.put(metric, *args, run=this_run) 

259 self.assertIsInstance(ref, DatasetRef) 

260 

261 # Test getDirect 

262 metricOut = butler.getDirect(ref) 

263 self.assertEqual(metric, metricOut) 

264 # Test get 

265 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

266 self.assertEqual(metric, metricOut) 

267 # Test get with a datasetRef 

268 metricOut = butler.get(ref, collections=this_run) 

269 self.assertEqual(metric, metricOut) 

270 # Test getDeferred with dataId 

271 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

272 self.assertEqual(metric, metricOut) 

273 # Test getDeferred with a datasetRef 

274 metricOut = butler.getDeferred(ref, collections=this_run).get() 

275 self.assertEqual(metric, metricOut) 

276 # and deferred direct with ref 

277 metricOut = butler.getDirectDeferred(ref).get() 

278 self.assertEqual(metric, metricOut) 

279 

280 # Check we can get components 

281 if storageClass.isComposite(): 

282 self.assertGetComponents( 

283 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

284 ) 

285 

286 # Can the artifacts themselves be retrieved? 

287 if not butler.datastore.isEphemeral: 

288 root_uri = ResourcePath(self.root) 

289 

290 for preserve_path in (True, False): 

291 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

292 # Use copy so that we can test that overwrite 

293 # protection works (using "auto" for File URIs would 

294 # use hard links and subsequent transfer would work 

295 # because it knows they are the same file). 

296 transferred = butler.retrieveArtifacts( 

297 [ref], destination, preserve_path=preserve_path, transfer="copy" 

298 ) 

299 self.assertGreater(len(transferred), 0) 

300 artifacts = list(ResourcePath.findFileResources([destination])) 

301 self.assertEqual(set(transferred), set(artifacts)) 

302 

303 for artifact in transferred: 

304 path_in_destination = artifact.relative_to(destination) 

305 self.assertIsNotNone(path_in_destination) 

306 

307 # when path is not preserved there should not be 

308 # any path separators. 

309 num_seps = path_in_destination.count("/") 

310 if preserve_path: 

311 self.assertGreater(num_seps, 0) 

312 else: 

313 self.assertEqual(num_seps, 0) 

314 

315 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

316 n_uris = len(secondary_uris) 

317 if primary_uri: 

318 n_uris += 1 

319 self.assertEqual( 

320 len(artifacts), 

321 n_uris, 

322 "Comparing expected artifacts vs actual:" 

323 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

324 ) 

325 

326 if preserve_path: 

327 # No need to run these twice 

328 with self.assertRaises(ValueError): 

329 butler.retrieveArtifacts([ref], destination, transfer="move") 

330 

331 with self.assertRaises(FileExistsError): 

332 butler.retrieveArtifacts([ref], destination) 

333 

334 transferred_again = butler.retrieveArtifacts( 

335 [ref], destination, preserve_path=preserve_path, overwrite=True 

336 ) 

337 self.assertEqual(set(transferred_again), set(transferred)) 

338 

339 # Now remove the dataset completely. 

340 butler.pruneDatasets([ref], purge=True, unstore=True) 

341 # Lookup with original args should still fail. 

342 with self.assertRaises(LookupError): 

343 butler.datasetExists(*args, collections=this_run) 

344 # getDirect() should still fail. 

345 with self.assertRaises(FileNotFoundError): 

346 butler.getDirect(ref) 

347 # Registry shouldn't be able to find it by dataset_id anymore. 

348 self.assertIsNone(butler.registry.getDataset(ref.id)) 

349 

350 # Do explicit registry removal since we know they are 

351 # empty 

352 butler.registry.removeCollection(this_run) 

353 expected_collections.remove(this_run) 

354 

355 # Put the dataset again, since the last thing we did was remove it 

356 # and we want to use the default collection. 

357 ref = butler.put(metric, refIn) 

358 

359 # Get with parameters 

360 stop = 4 

361 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

362 self.assertNotEqual(metric, sliced) 

363 self.assertEqual(metric.summary, sliced.summary) 

364 self.assertEqual(metric.output, sliced.output) 

365 self.assertEqual(metric.data[:stop], sliced.data) 

366 # getDeferred with parameters 

367 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with deferred parameters 

373 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 

379 if storageClass.isComposite(): 

380 # Check that components can be retrieved 

381 metricOut = butler.get(ref.datasetType.name, dataId) 

382 compNameS = ref.datasetType.componentTypeName("summary") 

383 compNameD = ref.datasetType.componentTypeName("data") 

384 summary = butler.get(compNameS, dataId) 

385 self.assertEqual(summary, metric.summary) 

386 data = butler.get(compNameD, dataId) 

387 self.assertEqual(data, metric.data) 

388 

389 if "counter" in storageClass.derivedComponents: 

390 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

391 self.assertEqual(count, len(data)) 

392 

393 count = butler.get( 

394 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

395 ) 

396 self.assertEqual(count, stop) 

397 

398 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

399 summary = butler.getDirect(compRef) 

400 self.assertEqual(summary, metric.summary) 

401 

402 # Create a Dataset type that has the same name but is inconsistent. 

403 inconsistentDatasetType = DatasetType( 

404 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

405 ) 

406 

407 # Getting with a dataset type that does not match registry fails 

408 with self.assertRaises(ValueError): 

409 butler.get(inconsistentDatasetType, dataId) 

410 

411 # Combining a DatasetRef with a dataId should fail 

412 with self.assertRaises(ValueError): 

413 butler.get(ref, dataId) 

414 # Getting with an explicit ref should fail if the id doesn't match 

415 with self.assertRaises(ValueError): 

416 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=101)) 

417 

418 # Getting a dataset with unknown parameters should fail 

419 with self.assertRaises(KeyError): 

420 butler.get(ref, parameters={"unsupported": True}) 

421 

422 # Check we have a collection 

423 collections = set(butler.registry.queryCollections()) 

424 self.assertEqual(collections, expected_collections) 

425 

426 # Clean up to check that we can remove something that may have 

427 # already had a component removed 

428 butler.pruneDatasets([ref], unstore=True, purge=True) 

429 

430 # Check that we can configure a butler to accept a put even 

431 # if it already has the dataset in registry. 

432 ref = butler.put(metric, refIn) 

433 

434 # Repeat put will fail. 

435 with self.assertRaises(ConflictingDefinitionError): 

436 butler.put(metric, refIn) 

437 

438 # Remove the datastore entry. 

439 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

440 

441 # Put will still fail 

442 with self.assertRaises(ConflictingDefinitionError): 

443 butler.put(metric, refIn) 

444 

445 # Allow the put to succeed 

446 butler._allow_put_of_predefined_dataset = True 

447 ref2 = butler.put(metric, refIn) 

448 self.assertEqual(ref2.id, ref.id) 

449 

450 # A second put will still fail but with a different exception 

451 # than before. 

452 with self.assertRaises(ConflictingDefinitionError): 

453 butler.put(metric, refIn) 

454 

455 # Reset the flag to avoid confusion 

456 butler._allow_put_of_predefined_dataset = False 

457 

458 # Leave the dataset in place since some downstream tests require 

459 # something to be present 

460 

461 return butler 

462 

463 def testDeferredCollectionPassing(self): 

464 # Construct a butler with no run or collection, but make it writeable. 

465 butler = Butler(self.tmpConfigFile, writeable=True) 

466 # Create and register a DatasetType 

467 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

468 datasetType = self.addDatasetType( 

469 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

470 ) 

471 # Add needed Dimensions 

472 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

473 butler.registry.insertDimensionData( 

474 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

475 ) 

476 butler.registry.insertDimensionData( 

477 "visit", 

478 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

479 ) 

480 dataId = {"instrument": "DummyCamComp", "visit": 423} 

481 # Create dataset. 

482 metric = makeExampleMetrics() 

483 # Register a new run and put dataset. 

484 run = "deferred" 

485 self.assertTrue(butler.registry.registerRun(run)) 

486 # Second time it will be allowed but indicate no-op 

487 self.assertFalse(butler.registry.registerRun(run)) 

488 ref = butler.put(metric, datasetType, dataId, run=run) 

489 # Putting with no run should fail with TypeError. 

490 with self.assertRaises(CollectionError): 

491 butler.put(metric, datasetType, dataId) 

492 # Dataset should exist. 

493 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

494 # We should be able to get the dataset back, but with and without 

495 # a deferred dataset handle. 

496 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

497 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

498 # Trying to find the dataset without any collection is a TypeError. 

499 with self.assertRaises(CollectionError): 

500 butler.datasetExists(datasetType, dataId) 

501 with self.assertRaises(CollectionError): 

502 butler.get(datasetType, dataId) 

503 # Associate the dataset with a different collection. 

504 butler.registry.registerCollection("tagged") 

505 butler.registry.associate("tagged", [ref]) 

506 # Deleting the dataset from the new collection should make it findable 

507 # in the original collection. 

508 butler.pruneDatasets([ref], tags=["tagged"]) 

509 self.assertTrue(butler.datasetExists(datasetType, dataId, collections=[run])) 

510 

511 

512class ButlerTests(ButlerPutGetTests): 

513 """Tests for Butler.""" 

514 

515 useTempRoot = True 

516 

517 def setUp(self): 

518 """Create a new butler root for each test.""" 

519 self.root = makeTestTempDir(TESTDIR) 

520 Butler.makeRepo(self.root, config=Config(self.configFile)) 

521 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

522 

523 def testConstructor(self): 

524 """Independent test of constructor.""" 

525 butler = Butler(self.tmpConfigFile, run=self.default_run) 

526 self.assertIsInstance(butler, Butler) 

527 

528 # Check that butler.yaml is added automatically. 

529 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

530 config_dir = self.tmpConfigFile[: -len(end)] 

531 butler = Butler(config_dir, run=self.default_run) 

532 self.assertIsInstance(butler, Butler) 

533 

534 # Even with a ResourcePath. 

535 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

536 self.assertIsInstance(butler, Butler) 

537 

538 collections = set(butler.registry.queryCollections()) 

539 self.assertEqual(collections, {self.default_run}) 

540 

541 # Check that some special characters can be included in run name. 

542 special_run = "u@b.c-A" 

543 butler_special = Butler(butler=butler, run=special_run) 

544 collections = set(butler_special.registry.queryCollections("*@*")) 

545 self.assertEqual(collections, {special_run}) 

546 

547 butler2 = Butler(butler=butler, collections=["other"]) 

548 self.assertEqual(butler2.collections, ("other",)) 

549 self.assertIsNone(butler2.run) 

550 self.assertIs(butler.datastore, butler2.datastore) 

551 

552 # Test that we can use an environment variable to find this 

553 # repository. 

554 butler_index = Config() 

555 butler_index["label"] = self.tmpConfigFile 

556 for suffix in (".yaml", ".json"): 

557 # Ensure that the content differs so that we know that 

558 # we aren't reusing the cache. 

559 bad_label = f"s3://bucket/not_real{suffix}" 

560 butler_index["bad_label"] = bad_label 

561 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

562 butler_index.dumpToUri(temp_file) 

563 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

564 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

565 uri = Butler.get_repo_uri("bad_label") 

566 self.assertEqual(uri, ResourcePath(bad_label)) 

567 uri = Butler.get_repo_uri("label") 

568 butler = Butler(uri, writeable=False) 

569 self.assertIsInstance(butler, Butler) 

570 butler = Butler("label", writeable=False) 

571 self.assertIsInstance(butler, Butler) 

572 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

573 Butler("not_there", writeable=False) 

574 with self.assertRaises(KeyError) as cm: 

575 Butler.get_repo_uri("missing") 

576 self.assertIn("not known to", str(cm.exception)) 

577 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

578 with self.assertRaises(FileNotFoundError): 

579 Butler.get_repo_uri("label") 

580 self.assertEqual(Butler.get_known_repos(), set()) 

581 with self.assertRaises(KeyError) as cm: 

582 # No environment variable set. 

583 Butler.get_repo_uri("label") 

584 self.assertIn("No repository index defined", str(cm.exception)) 

585 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

586 # No aliases registered. 

587 Butler("not_there") 

588 self.assertEqual(Butler.get_known_repos(), set()) 

589 

590 def testBasicPutGet(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

592 self.runPutGetTest(storageClass, "test_metric") 

593 

594 def testCompositePutGetConcrete(self): 

595 

596 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

597 butler = self.runPutGetTest(storageClass, "test_metric") 

598 

599 # Should *not* be disassembled 

600 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

601 self.assertEqual(len(datasets), 1) 

602 uri, components = butler.getURIs(datasets[0]) 

603 self.assertIsInstance(uri, ResourcePath) 

604 self.assertFalse(components) 

605 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

606 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

607 

608 # Predicted dataset 

609 dataId = {"instrument": "DummyCamComp", "visit": 424} 

610 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

611 self.assertFalse(components) 

612 self.assertIsInstance(uri, ResourcePath) 

613 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

614 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

615 

616 def testCompositePutGetVirtual(self): 

617 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

618 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

619 

620 # Should be disassembled 

621 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

622 self.assertEqual(len(datasets), 1) 

623 uri, components = butler.getURIs(datasets[0]) 

624 

625 if butler.datastore.isEphemeral: 

626 # Never disassemble in-memory datastore 

627 self.assertIsInstance(uri, ResourcePath) 

628 self.assertFalse(components) 

629 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

630 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

631 else: 

632 self.assertIsNone(uri) 

633 self.assertEqual(set(components), set(storageClass.components)) 

634 for compuri in components.values(): 

635 self.assertIsInstance(compuri, ResourcePath) 

636 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

637 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

638 

639 # Predicted dataset 

640 dataId = {"instrument": "DummyCamComp", "visit": 424} 

641 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

642 

643 if butler.datastore.isEphemeral: 

644 # Never disassembled 

645 self.assertIsInstance(uri, ResourcePath) 

646 self.assertFalse(components) 

647 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

648 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

649 else: 

650 self.assertIsNone(uri) 

651 self.assertEqual(set(components), set(storageClass.components)) 

652 for compuri in components.values(): 

653 self.assertIsInstance(compuri, ResourcePath) 

654 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

655 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

656 

657 def testStorageClassOverrideGet(self): 

658 """Test storage class conversion on get with override.""" 

659 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

660 datasetTypeName = "anything" 

661 run = self.default_run 

662 

663 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

664 

665 # Create and store a dataset. 

666 metric = makeExampleMetrics() 

667 dataId = {"instrument": "DummyCamComp", "visit": 423} 

668 

669 ref = butler.put(metric, datasetType, dataId) 

670 

671 # Return native type. 

672 retrieved = butler.get(ref) 

673 self.assertEqual(retrieved, metric) 

674 

675 # Specify an override. 

676 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

677 model = butler.getDirect(ref, storageClass=new_sc) 

678 self.assertNotEqual(type(model), type(retrieved)) 

679 self.assertIs(type(model), new_sc.pytype) 

680 self.assertEqual(retrieved, model) 

681 

682 # Defer but override later. 

683 deferred = butler.getDirectDeferred(ref) 

684 model = deferred.get(storageClass=new_sc) 

685 self.assertIs(type(model), new_sc.pytype) 

686 self.assertEqual(retrieved, model) 

687 

688 # Defer but override up front. 

689 deferred = butler.getDirectDeferred(ref, storageClass=new_sc) 

690 model = deferred.get() 

691 self.assertIs(type(model), new_sc.pytype) 

692 self.assertEqual(retrieved, model) 

693 

694 # Retrieve a component. Should be a tuple. 

695 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

696 self.assertIs(type(data), tuple) 

697 self.assertEqual(data, tuple(retrieved.data)) 

698 

699 # Parameter on the write storage class should work regardless 

700 # of read storage class. 

701 data = butler.get( 

702 "anything.data", 

703 dataId, 

704 storageClass="StructuredDataDataTestTuple", 

705 parameters={"slice": slice(2, 4)}, 

706 ) 

707 self.assertEqual(len(data), 2) 

708 

709 # Try a parameter that is known to the read storage class but not 

710 # the write storage class. 

711 with self.assertRaises(KeyError): 

712 butler.get( 

713 "anything.data", 

714 dataId, 

715 storageClass="StructuredDataDataTestTuple", 

716 parameters={"xslice": slice(2, 4)}, 

717 ) 

718 

719 def testPytypePutCoercion(self): 

720 """Test python type coercion on Butler.get and put.""" 

721 

722 # Store some data with the normal example storage class. 

723 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

724 datasetTypeName = "test_metric" 

725 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

726 

727 dataId = {"instrument": "DummyCamComp", "visit": 423} 

728 

729 # Put a dict and this should coerce to a MetricsExample 

730 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

731 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

732 test_metric = butler.getDirect(metric_ref) 

733 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

734 self.assertEqual(test_metric.summary, test_dict["summary"]) 

735 self.assertEqual(test_metric.output, test_dict["output"]) 

736 

737 # Check that the put still works if a DatasetType is given with 

738 # a definition matching this python type. 

739 registry_type = butler.registry.getDatasetType(datasetTypeName) 

740 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

741 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

742 self.assertEqual(metric2_ref.datasetType, registry_type) 

743 

744 # The get will return the type expected by registry. 

745 test_metric2 = butler.getDirect(metric2_ref) 

746 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

747 

748 # Make a new DatasetRef with the compatible but different DatasetType. 

749 # This should now return a dict. 

750 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

751 test_dict2 = butler.getDirect(new_ref) 

752 self.assertEqual(get_full_type_name(test_dict2), "dict") 

753 

754 # Get it again with the wrong dataset type definition using get() 

755 # rather than getDirect(). This should be consistent with getDirect() 

756 # behavior and return the type of the DatasetType. 

757 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

758 self.assertEqual(get_full_type_name(test_dict3), "dict") 

759 

760 def testIngest(self): 

761 butler = Butler(self.tmpConfigFile, run=self.default_run) 

762 

763 # Create and register a DatasetType 

764 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

765 

766 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

767 datasetTypeName = "metric" 

768 

769 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

770 

771 # Add needed Dimensions 

772 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

773 butler.registry.insertDimensionData( 

774 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

775 ) 

776 for detector in (1, 2): 

777 butler.registry.insertDimensionData( 

778 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

779 ) 

780 

781 butler.registry.insertDimensionData( 

782 "visit", 

783 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

784 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

785 ) 

786 

787 formatter = doImport("lsst.daf.butler.formatters.yaml.YamlFormatter") 

788 dataRoot = os.path.join(TESTDIR, "data", "basic") 

789 datasets = [] 

790 for detector in (1, 2): 

791 detector_name = f"detector_{detector}" 

792 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

793 dataId = {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

794 # Create a DatasetRef for ingest 

795 refIn = DatasetRef(datasetType, dataId, id=None) 

796 

797 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

798 

799 butler.ingest(*datasets, transfer="copy") 

800 

801 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

802 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

803 

804 metrics1 = butler.get(datasetTypeName, dataId1) 

805 metrics2 = butler.get(datasetTypeName, dataId2) 

806 self.assertNotEqual(metrics1, metrics2) 

807 

808 # Compare URIs 

809 uri1 = butler.getURI(datasetTypeName, dataId1) 

810 uri2 = butler.getURI(datasetTypeName, dataId2) 

811 self.assertNotEqual(uri1, uri2) 

812 

813 # Now do a multi-dataset but single file ingest 

814 metricFile = os.path.join(dataRoot, "detectors.yaml") 

815 refs = [] 

816 for detector in (1, 2): 

817 detector_name = f"detector_{detector}" 

818 dataId = {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

819 # Create a DatasetRef for ingest 

820 refs.append(DatasetRef(datasetType, dataId, id=None)) 

821 

822 # Test "move" transfer to ensure that the files themselves 

823 # have disappeared following ingest. 

824 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

825 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

826 

827 datasets = [] 

828 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

829 

830 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

831 self.assertFalse(tempFile.exists()) 

832 

833 # Check that the datastore recorded no file size. 

834 # Not all datastores can support this. 

835 try: 

836 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) 

837 self.assertEqual(infos[0].file_size, -1) 

838 except AttributeError: 

839 pass 

840 

841 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

842 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

843 

844 multi1 = butler.get(datasetTypeName, dataId1) 

845 multi2 = butler.get(datasetTypeName, dataId2) 

846 

847 self.assertEqual(multi1, metrics1) 

848 self.assertEqual(multi2, metrics2) 

849 

850 # Compare URIs 

851 uri1 = butler.getURI(datasetTypeName, dataId1) 

852 uri2 = butler.getURI(datasetTypeName, dataId2) 

853 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

854 

855 # Test that removing one does not break the second 

856 # This line will issue a warning log message for a ChainedDatastore 

857 # that uses an InMemoryDatastore since in-memory can not ingest 

858 # files. 

859 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

860 self.assertFalse(butler.datasetExists(datasetTypeName, dataId1)) 

861 self.assertTrue(butler.datasetExists(datasetTypeName, dataId2)) 

862 multi2b = butler.get(datasetTypeName, dataId2) 

863 self.assertEqual(multi2, multi2b) 

864 

865 def testPruneCollections(self): 

866 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

867 butler = Butler(self.tmpConfigFile, writeable=True) 

868 # Load registry data with dimensions to hang datasets off of. 

869 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

870 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

871 # Add some RUN-type collections. 

872 run1 = "run1" 

873 butler.registry.registerRun(run1) 

874 run2 = "run2" 

875 butler.registry.registerRun(run2) 

876 # put some datasets. ref1 and ref2 have the same data ID, and are in 

877 # different runs. ref3 has a different data ID. 

878 metric = makeExampleMetrics() 

879 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

880 datasetType = self.addDatasetType( 

881 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

882 ) 

883 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

884 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

885 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

886 

887 # Try to delete a RUN collection without purge, or with purge and not 

888 # unstore. 

889 with self.assertRaises(TypeError): 

890 butler.pruneCollection(run1) 

891 with self.assertRaises(TypeError): 

892 butler.pruneCollection(run2, purge=True) 

893 # Add a TAGGED collection and associate ref3 only into it. 

894 tag1 = "tag1" 

895 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

896 self.assertTrue(registered) 

897 # Registering a second time should be allowed. 

898 registered = butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

899 self.assertFalse(registered) 

900 butler.registry.associate(tag1, [ref3]) 

901 # Add a CHAINED collection that searches run1 and then run2. It 

902 # logically contains only ref1, because ref2 is shadowed due to them 

903 # having the same data ID and dataset type. 

904 chain1 = "chain1" 

905 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

906 butler.registry.setCollectionChain(chain1, [run1, run2]) 

907 # Try to delete RUN collections, which should fail with complete 

908 # rollback because they're still referenced by the CHAINED 

909 # collection. 

910 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

911 butler.pruneCollection(run1, purge=True, unstore=True) 

912 with self.assertRaises(sqlalchemy.exc.IntegrityError): 

913 butler.pruneCollection(run2, purge=True, unstore=True) 

914 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

915 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

916 self.assertTrue(existence[ref1]) 

917 self.assertTrue(existence[ref2]) 

918 self.assertTrue(existence[ref3]) 

919 # Try to delete CHAINED and TAGGED collections with purge; should not 

920 # work. 

921 with self.assertRaises(TypeError): 

922 butler.pruneCollection(tag1, purge=True, unstore=True) 

923 with self.assertRaises(TypeError): 

924 butler.pruneCollection(chain1, purge=True, unstore=True) 

925 # Remove the tagged collection with unstore=False. This should not 

926 # affect the datasets. 

927 butler.pruneCollection(tag1) 

928 with self.assertRaises(MissingCollectionError): 

929 butler.registry.getCollectionType(tag1) 

930 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

931 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

932 self.assertTrue(existence[ref1]) 

933 self.assertTrue(existence[ref2]) 

934 self.assertTrue(existence[ref3]) 

935 # Add the tagged collection back in, and remove it with unstore=True. 

936 # This should remove ref3 only from the datastore. 

937 butler.registry.registerCollection(tag1, type=CollectionType.TAGGED) 

938 butler.registry.associate(tag1, [ref3]) 

939 butler.pruneCollection(tag1, unstore=True) 

940 with self.assertRaises(MissingCollectionError): 

941 butler.registry.getCollectionType(tag1) 

942 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

943 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

944 self.assertTrue(existence[ref1]) 

945 self.assertTrue(existence[ref2]) 

946 self.assertFalse(existence[ref3]) 

947 # Delete the chain with unstore=False. The datasets should not be 

948 # affected at all. 

949 butler.pruneCollection(chain1) 

950 with self.assertRaises(MissingCollectionError): 

951 butler.registry.getCollectionType(chain1) 

952 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

953 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

954 self.assertTrue(existence[ref1]) 

955 self.assertTrue(existence[ref2]) 

956 self.assertFalse(existence[ref3]) 

957 existence = butler.datastore.knows_these([ref1, ref2, ref3]) 

958 self.assertTrue(existence[ref1]) 

959 self.assertTrue(existence[ref2]) 

960 self.assertFalse(existence[ref3]) 

961 # Redefine and then delete the chain with unstore=True. Only ref1 

962 # should be unstored (ref3 has already been unstored, but otherwise 

963 # would be now). 

964 butler.registry.registerCollection(chain1, type=CollectionType.CHAINED) 

965 butler.registry.setCollectionChain(chain1, [run1, run2]) 

966 butler.pruneCollection(chain1, unstore=True) 

967 with self.assertRaises(MissingCollectionError): 

968 butler.registry.getCollectionType(chain1) 

969 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref1, ref2, ref3]) 

970 existence = butler.datastore.mexists([ref1, ref2, ref3]) 

971 self.assertFalse(existence[ref1]) 

972 self.assertTrue(existence[ref2]) 

973 self.assertFalse(existence[ref3]) 

974 # Remove run1. This removes ref1 and ref3 from the registry (they're 

975 # already gone from the datastore, which is fine). 

976 butler.pruneCollection(run1, purge=True, unstore=True) 

977 with self.assertRaises(MissingCollectionError): 

978 butler.registry.getCollectionType(run1) 

979 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), [ref2]) 

980 self.assertTrue(butler.datastore.exists(ref2)) 

981 self.assertTrue(butler.datastore.knows(ref2)) 

982 # Remove run2. This removes ref2 from the registry and the datastore. 

983 butler.pruneCollection(run2, purge=True, unstore=True) 

984 with self.assertRaises(MissingCollectionError): 

985 butler.registry.getCollectionType(run2) 

986 self.assertCountEqual(set(butler.registry.queryDatasets(..., collections=...)), []) 

987 

988 # Now that the collections have been pruned we can remove the 

989 # dataset type 

990 butler.registry.removeDatasetType(datasetType.name) 

991 

992 def testPickle(self): 

993 """Test pickle support.""" 

994 butler = Butler(self.tmpConfigFile, run=self.default_run) 

995 butlerOut = pickle.loads(pickle.dumps(butler)) 

996 self.assertIsInstance(butlerOut, Butler) 

997 self.assertEqual(butlerOut._config, butler._config) 

998 self.assertEqual(butlerOut.collections, butler.collections) 

999 self.assertEqual(butlerOut.run, butler.run) 

1000 

1001 def testGetDatasetTypes(self): 

1002 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1003 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1004 dimensionEntries = [ 

1005 ( 

1006 "instrument", 

1007 {"instrument": "DummyCam"}, 

1008 {"instrument": "DummyHSC"}, 

1009 {"instrument": "DummyCamComp"}, 

1010 ), 

1011 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1012 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1013 ] 

1014 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1015 # Add needed Dimensions 

1016 for args in dimensionEntries: 

1017 butler.registry.insertDimensionData(*args) 

1018 

1019 # When a DatasetType is added to the registry entries are not created 

1020 # for components but querying them can return the components. 

1021 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1022 components = set() 

1023 for datasetTypeName in datasetTypeNames: 

1024 # Create and register a DatasetType 

1025 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1026 

1027 for componentName in storageClass.components: 

1028 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1029 

1030 fromRegistry: set[DatasetType] = set() 

1031 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1032 fromRegistry.add(parent_dataset_type) 

1033 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1034 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1035 

1036 # Now that we have some dataset types registered, validate them 

1037 butler.validateConfiguration( 

1038 ignore=[ 

1039 "test_metric_comp", 

1040 "metric3", 

1041 "metric5", 

1042 "calexp", 

1043 "DummySC", 

1044 "datasetType.component", 

1045 "random_data", 

1046 "random_data_2", 

1047 ] 

1048 ) 

1049 

1050 # Add a new datasetType that will fail template validation 

1051 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1052 if self.validationCanFail: 

1053 with self.assertRaises(ValidationError): 

1054 butler.validateConfiguration() 

1055 

1056 # Rerun validation but with a subset of dataset type names 

1057 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1058 

1059 # Rerun validation but ignore the bad datasetType 

1060 butler.validateConfiguration( 

1061 ignore=[ 

1062 "test_metric_comp", 

1063 "metric3", 

1064 "metric5", 

1065 "calexp", 

1066 "DummySC", 

1067 "datasetType.component", 

1068 "random_data", 

1069 "random_data_2", 

1070 ] 

1071 ) 

1072 

1073 def testTransaction(self): 

1074 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1075 datasetTypeName = "test_metric" 

1076 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1077 dimensionEntries = ( 

1078 ("instrument", {"instrument": "DummyCam"}), 

1079 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1080 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1081 ) 

1082 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1083 metric = makeExampleMetrics() 

1084 dataId = {"instrument": "DummyCam", "visit": 42} 

1085 # Create and register a DatasetType 

1086 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1087 with self.assertRaises(TransactionTestError): 

1088 with butler.transaction(): 

1089 # Add needed Dimensions 

1090 for args in dimensionEntries: 

1091 butler.registry.insertDimensionData(*args) 

1092 # Store a dataset 

1093 ref = butler.put(metric, datasetTypeName, dataId) 

1094 self.assertIsInstance(ref, DatasetRef) 

1095 # Test getDirect 

1096 metricOut = butler.getDirect(ref) 

1097 self.assertEqual(metric, metricOut) 

1098 # Test get 

1099 metricOut = butler.get(datasetTypeName, dataId) 

1100 self.assertEqual(metric, metricOut) 

1101 # Check we can get components 

1102 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1103 raise TransactionTestError("This should roll back the entire transaction") 

1104 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1105 butler.registry.expandDataId(dataId) 

1106 # Should raise LookupError for missing data ID value 

1107 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1108 butler.get(datasetTypeName, dataId) 

1109 # Also check explicitly if Dataset entry is missing 

1110 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1111 # Direct retrieval should not find the file in the Datastore 

1112 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1113 butler.getDirect(ref) 

1114 

1115 def testMakeRepo(self): 

1116 """Test that we can write butler configuration to a new repository via 

1117 the Butler.makeRepo interface and then instantiate a butler from the 

1118 repo root. 

1119 """ 

1120 # Do not run the test if we know this datastore configuration does 

1121 # not support a file system root 

1122 if self.fullConfigKey is None: 

1123 return 

1124 

1125 # create two separate directories 

1126 root1 = tempfile.mkdtemp(dir=self.root) 

1127 root2 = tempfile.mkdtemp(dir=self.root) 

1128 

1129 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1130 limited = Config(self.configFile) 

1131 butler1 = Butler(butlerConfig) 

1132 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1133 full = Config(self.tmpConfigFile) 

1134 butler2 = Butler(butlerConfig) 

1135 # Butlers should have the same configuration regardless of whether 

1136 # defaults were expanded. 

1137 self.assertEqual(butler1._config, butler2._config) 

1138 # Config files loaded directly should not be the same. 

1139 self.assertNotEqual(limited, full) 

1140 # Make sure "limited" doesn't have a few keys we know it should be 

1141 # inheriting from defaults. 

1142 self.assertIn(self.fullConfigKey, full) 

1143 self.assertNotIn(self.fullConfigKey, limited) 

1144 

1145 # Collections don't appear until something is put in them 

1146 collections1 = set(butler1.registry.queryCollections()) 

1147 self.assertEqual(collections1, set()) 

1148 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1149 

1150 # Check that a config with no associated file name will not 

1151 # work properly with relocatable Butler repo 

1152 butlerConfig.configFile = None 

1153 with self.assertRaises(ValueError): 

1154 Butler(butlerConfig) 

1155 

1156 with self.assertRaises(FileExistsError): 

1157 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1158 

1159 def testStringification(self): 

1160 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1161 butlerStr = str(butler) 

1162 

1163 if self.datastoreStr is not None: 

1164 for testStr in self.datastoreStr: 

1165 self.assertIn(testStr, butlerStr) 

1166 if self.registryStr is not None: 

1167 self.assertIn(self.registryStr, butlerStr) 

1168 

1169 datastoreName = butler.datastore.name 

1170 if self.datastoreName is not None: 

1171 for testStr in self.datastoreName: 

1172 self.assertIn(testStr, datastoreName) 

1173 

1174 def testButlerRewriteDataId(self): 

1175 """Test that dataIds can be rewritten based on dimension records.""" 

1176 

1177 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1178 

1179 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1180 datasetTypeName = "random_data" 

1181 

1182 # Create dimension records. 

1183 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1184 butler.registry.insertDimensionData( 

1185 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1186 ) 

1187 butler.registry.insertDimensionData( 

1188 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1189 ) 

1190 

1191 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1192 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1193 butler.registry.registerDatasetType(datasetType) 

1194 

1195 n_exposures = 5 

1196 dayobs = 20210530 

1197 

1198 for i in range(n_exposures): 

1199 butler.registry.insertDimensionData( 

1200 "exposure", 

1201 { 

1202 "instrument": "DummyCamComp", 

1203 "id": i, 

1204 "obs_id": f"exp{i}", 

1205 "seq_num": i, 

1206 "day_obs": dayobs, 

1207 "physical_filter": "d-r", 

1208 }, 

1209 ) 

1210 

1211 # Write some data. 

1212 for i in range(n_exposures): 

1213 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1214 

1215 # Use the seq_num for the put to test rewriting. 

1216 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1217 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1218 

1219 # Check that the exposure is correct in the dataId 

1220 self.assertEqual(ref.dataId["exposure"], i) 

1221 

1222 # and check that we can get the dataset back with the same dataId 

1223 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1224 self.assertEqual(new_metric, metric) 

1225 

1226 

1227class FileDatastoreButlerTests(ButlerTests): 

1228 """Common tests and specialization of ButlerTests for butlers backed 

1229 by datastores that inherit from FileDatastore. 

1230 """ 

1231 

1232 def checkFileExists(self, root, relpath): 

1233 """Checks if file exists at a given path (relative to root). 

1234 

1235 Test testPutTemplates verifies actual physical existance of the files 

1236 in the requested location. 

1237 """ 

1238 uri = ResourcePath(root, forceDirectory=True) 

1239 return uri.join(relpath).exists() 

1240 

1241 def testPutTemplates(self): 

1242 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1243 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1244 

1245 # Add needed Dimensions 

1246 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1247 butler.registry.insertDimensionData( 

1248 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1249 ) 

1250 butler.registry.insertDimensionData( 

1251 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1252 ) 

1253 butler.registry.insertDimensionData( 

1254 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1255 ) 

1256 

1257 # Create and store a dataset 

1258 metric = makeExampleMetrics() 

1259 

1260 # Create two almost-identical DatasetTypes (both will use default 

1261 # template) 

1262 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1263 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1264 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1265 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1266 

1267 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1268 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1269 

1270 # Put with exactly the data ID keys needed 

1271 ref = butler.put(metric, "metric1", dataId1) 

1272 uri = butler.getURI(ref) 

1273 self.assertTrue(uri.exists()) 

1274 self.assertTrue( 

1275 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1276 ) 

1277 

1278 # Check the template based on dimensions 

1279 if hasattr(butler.datastore, "templates"): 

1280 butler.datastore.templates.validateTemplates([ref]) 

1281 

1282 # Put with extra data ID keys (physical_filter is an optional 

1283 # dependency); should not change template (at least the way we're 

1284 # defining them to behave now; the important thing is that they 

1285 # must be consistent). 

1286 ref = butler.put(metric, "metric2", dataId2) 

1287 uri = butler.getURI(ref) 

1288 self.assertTrue(uri.exists()) 

1289 self.assertTrue( 

1290 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1291 ) 

1292 

1293 # Check the template based on dimensions 

1294 if hasattr(butler.datastore, "templates"): 

1295 butler.datastore.templates.validateTemplates([ref]) 

1296 

1297 # Use a template that has a typo in dimension record metadata. 

1298 # Easier to test with a butler that has a ref with records attached. 

1299 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1300 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1301 path = template.format(ref) 

1302 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1303 

1304 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1305 with self.assertRaises(KeyError): 

1306 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1307 template.format(ref) 

1308 

1309 # Now use a file template that will not result in unique filenames 

1310 with self.assertRaises(FileTemplateValidationError): 

1311 butler.put(metric, "metric3", dataId1) 

1312 

1313 def testImportExport(self): 

1314 # Run put/get tests just to create and populate a repo. 

1315 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1316 self.runImportExportTest(storageClass) 

1317 

1318 @unittest.expectedFailure 

1319 def testImportExportVirtualComposite(self): 

1320 # Run put/get tests just to create and populate a repo. 

1321 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1322 self.runImportExportTest(storageClass) 

1323 

1324 def runImportExportTest(self, storageClass): 

1325 """This test does an export to a temp directory and an import back 

1326 into a new temp directory repo. It does not assume a posix datastore""" 

1327 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1328 # Test that the repo actually has at least one dataset. 

1329 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1330 self.assertGreater(len(datasets), 0) 

1331 # Add a DimensionRecord that's unused by those datasets. 

1332 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1333 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1334 # Export and then import datasets. 

1335 with safeTestTempDir(TESTDIR) as exportDir: 

1336 exportFile = os.path.join(exportDir, "exports.yaml") 

1337 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1338 export.saveDatasets(datasets) 

1339 # Export the same datasets again. This should quietly do 

1340 # nothing because of internal deduplication, and it shouldn't 

1341 # complain about being asked to export the "htm7" elements even 

1342 # though there aren't any in these datasets or in the database. 

1343 export.saveDatasets(datasets, elements=["htm7"]) 

1344 # Save one of the data IDs again; this should be harmless 

1345 # because of internal deduplication. 

1346 export.saveDataIds([datasets[0].dataId]) 

1347 # Save some dimension records directly. 

1348 export.saveDimensionData("skymap", [skymapRecord]) 

1349 self.assertTrue(os.path.exists(exportFile)) 

1350 with safeTestTempDir(TESTDIR) as importDir: 

1351 # We always want this to be a local posix butler 

1352 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1353 # Calling script.butlerImport tests the implementation of the 

1354 # butler command line interface "import" subcommand. Functions 

1355 # in the script folder are generally considered protected and 

1356 # should not be used as public api. 

1357 with open(exportFile, "r") as f: 

1358 script.butlerImport( 

1359 importDir, 

1360 export_file=f, 

1361 directory=exportDir, 

1362 transfer="auto", 

1363 skip_dimensions=None, 

1364 reuse_ids=False, 

1365 ) 

1366 importButler = Butler(importDir, run=self.default_run) 

1367 for ref in datasets: 

1368 with self.subTest(ref=ref): 

1369 # Test for existence by passing in the DatasetType and 

1370 # data ID separately, to avoid lookup by dataset_id. 

1371 self.assertTrue(importButler.datasetExists(ref.datasetType, ref.dataId)) 

1372 self.assertEqual( 

1373 list(importButler.registry.queryDimensionRecords("skymap")), 

1374 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1375 ) 

1376 

1377 def testRemoveRuns(self): 

1378 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1379 butler = Butler(self.tmpConfigFile, writeable=True) 

1380 # Load registry data with dimensions to hang datasets off of. 

1381 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1382 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1383 # Add some RUN-type collection. 

1384 run1 = "run1" 

1385 butler.registry.registerRun(run1) 

1386 run2 = "run2" 

1387 butler.registry.registerRun(run2) 

1388 # put a dataset in each 

1389 metric = makeExampleMetrics() 

1390 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1391 datasetType = self.addDatasetType( 

1392 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1393 ) 

1394 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1395 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1396 uri1 = butler.getURI(ref1, collections=[run1]) 

1397 uri2 = butler.getURI(ref2, collections=[run2]) 

1398 # Remove from both runs with different values for unstore. 

1399 butler.removeRuns([run1], unstore=True) 

1400 butler.removeRuns([run2], unstore=False) 

1401 # Should be nothing in registry for either one, and datastore should 

1402 # not think either exists. 

1403 with self.assertRaises(MissingCollectionError): 

1404 butler.registry.getCollectionType(run1) 

1405 with self.assertRaises(MissingCollectionError): 

1406 butler.registry.getCollectionType(run2) 

1407 self.assertFalse(butler.datastore.exists(ref1)) 

1408 self.assertFalse(butler.datastore.exists(ref2)) 

1409 # The ref we unstored should be gone according to the URI, but the 

1410 # one we forgot should still be around. 

1411 self.assertFalse(uri1.exists()) 

1412 self.assertTrue(uri2.exists()) 

1413 

1414 

1415class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1416 """PosixDatastore specialization of a butler""" 

1417 

1418 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1419 fullConfigKey = ".datastore.formatters" 

1420 validationCanFail = True 

1421 datastoreStr = ["/tmp"] 

1422 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1423 registryStr = "/gen3.sqlite3" 

1424 

1425 def testPathConstructor(self): 

1426 """Independent test of constructor using PathLike.""" 

1427 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1428 self.assertIsInstance(butler, Butler) 

1429 

1430 # And again with a Path object with the butler yaml 

1431 path = pathlib.Path(self.tmpConfigFile) 

1432 butler = Butler(path, writeable=False) 

1433 self.assertIsInstance(butler, Butler) 

1434 

1435 # And again with a Path object without the butler yaml 

1436 # (making sure we skip it if the tmp config doesn't end 

1437 # in butler.yaml -- which is the case for a subclass) 

1438 if self.tmpConfigFile.endswith("butler.yaml"): 

1439 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1440 butler = Butler(path, writeable=False) 

1441 self.assertIsInstance(butler, Butler) 

1442 

1443 def testExportTransferCopy(self): 

1444 """Test local export using all transfer modes""" 

1445 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1446 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1447 # Test that the repo actually has at least one dataset. 

1448 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1449 self.assertGreater(len(datasets), 0) 

1450 uris = [exportButler.getURI(d) for d in datasets] 

1451 datastoreRoot = exportButler.datastore.root 

1452 

1453 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1454 

1455 for path in pathsInStore: 

1456 # Assume local file system 

1457 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1458 

1459 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1460 with safeTestTempDir(TESTDIR) as exportDir: 

1461 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1462 export.saveDatasets(datasets) 

1463 for path in pathsInStore: 

1464 self.assertTrue( 

1465 self.checkFileExists(exportDir, path), 

1466 f"Check that mode {transfer} exported files", 

1467 ) 

1468 

1469 def testPruneDatasets(self): 

1470 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1471 butler = Butler(self.tmpConfigFile, writeable=True) 

1472 # Load registry data with dimensions to hang datasets off of. 

1473 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1474 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1475 # Add some RUN-type collections. 

1476 run1 = "run1" 

1477 butler.registry.registerRun(run1) 

1478 run2 = "run2" 

1479 butler.registry.registerRun(run2) 

1480 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1481 # different runs. ref3 has a different data ID. 

1482 metric = makeExampleMetrics() 

1483 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1484 datasetType = self.addDatasetType( 

1485 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1486 ) 

1487 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1488 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1489 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1490 

1491 # Simple prune. 

1492 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1493 with self.assertRaises(LookupError): 

1494 butler.datasetExists(ref1.datasetType, ref1.dataId, collections=run1) 

1495 

1496 # Put data back. 

1497 ref1 = butler.put(metric, ref1.unresolved(), run=run1) 

1498 ref2 = butler.put(metric, ref2.unresolved(), run=run2) 

1499 ref3 = butler.put(metric, ref3.unresolved(), run=run1) 

1500 

1501 # Check that in normal mode, deleting the record will lead to 

1502 # trash not touching the file. 

1503 uri1 = butler.datastore.getURI(ref1) 

1504 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1505 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref1.id}) 

1506 butler.datastore.trash(ref1) 

1507 butler.datastore.emptyTrash() 

1508 self.assertTrue(uri1.exists()) 

1509 uri1.remove() # Clean it up. 

1510 

1511 # Simulate execution butler setup by deleting the datastore 

1512 # record but keeping the file around and trusting. 

1513 butler.datastore.trustGetRequest = True 

1514 uri2 = butler.datastore.getURI(ref2) 

1515 uri3 = butler.datastore.getURI(ref3) 

1516 self.assertTrue(uri2.exists()) 

1517 self.assertTrue(uri3.exists()) 

1518 

1519 # Remove the datastore record. 

1520 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1521 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref2.id}) 

1522 self.assertTrue(uri2.exists()) 

1523 butler.datastore.trash([ref2, ref3]) 

1524 # Immediate removal for ref2 file 

1525 self.assertFalse(uri2.exists()) 

1526 # But ref3 has to wait for the empty. 

1527 self.assertTrue(uri3.exists()) 

1528 butler.datastore.emptyTrash() 

1529 self.assertFalse(uri3.exists()) 

1530 

1531 # Clear out the datasets from registry. 

1532 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1533 

1534 def testPytypeCoercion(self): 

1535 """Test python type coercion on Butler.get and put.""" 

1536 

1537 # Store some data with the normal example storage class. 

1538 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1539 datasetTypeName = "test_metric" 

1540 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1541 

1542 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1543 metric = butler.get(datasetTypeName, dataId=dataId) 

1544 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1545 

1546 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1547 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1548 

1549 # Now need to hack the registry dataset type definition. 

1550 # There is no API for this. 

1551 manager = butler.registry._managers.datasets 

1552 manager._db.update( 

1553 manager._static.dataset_type, 

1554 {"name": datasetTypeName}, 

1555 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1556 ) 

1557 

1558 # Force reset of dataset type cache 

1559 butler.registry.refresh() 

1560 

1561 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1562 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1563 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1564 

1565 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1566 self.assertNotEqual(type(metric_model), type(metric)) 

1567 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1568 

1569 # Put the model and read it back to show that everything now 

1570 # works as normal. 

1571 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1572 metric_model_new = butler.get(metric_ref) 

1573 self.assertEqual(metric_model_new, metric_model) 

1574 

1575 # Hack the storage class again to something that will fail on the 

1576 # get with no conversion class. 

1577 manager._db.update( 

1578 manager._static.dataset_type, 

1579 {"name": datasetTypeName}, 

1580 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1581 ) 

1582 butler.registry.refresh() 

1583 

1584 with self.assertRaises(ValueError): 

1585 butler.get(datasetTypeName, dataId=dataId) 

1586 

1587 

1588@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1589class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1590 """PosixDatastore specialization of a butler using Postgres""" 

1591 

1592 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1593 fullConfigKey = ".datastore.formatters" 

1594 validationCanFail = True 

1595 datastoreStr = ["/tmp"] 

1596 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1597 registryStr = "PostgreSQL@test" 

1598 

1599 @staticmethod 

1600 def _handler(postgresql): 

1601 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1602 with engine.begin() as connection: 

1603 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1604 

1605 @classmethod 

1606 def setUpClass(cls): 

1607 # Create the postgres test server. 

1608 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1609 cache_initialized_db=True, on_initialized=cls._handler 

1610 ) 

1611 super().setUpClass() 

1612 

1613 @classmethod 

1614 def tearDownClass(cls): 

1615 # Clean up any lingering SQLAlchemy engines/connections 

1616 # so they're closed before we shut down the server. 

1617 gc.collect() 

1618 cls.postgresql.clear_cache() 

1619 super().tearDownClass() 

1620 

1621 def setUp(self): 

1622 self.server = self.postgresql() 

1623 

1624 # Need to add a registry section to the config. 

1625 self._temp_config = False 

1626 config = Config(self.configFile) 

1627 config["registry", "db"] = self.server.url() 

1628 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1629 config.dump(fh) 

1630 self.configFile = fh.name 

1631 self._temp_config = True 

1632 super().setUp() 

1633 

1634 def tearDown(self): 

1635 self.server.stop() 

1636 if self._temp_config and os.path.exists(self.configFile): 

1637 os.remove(self.configFile) 

1638 super().tearDown() 

1639 

1640 def testMakeRepo(self): 

1641 # The base class test assumes that it's using sqlite and assumes 

1642 # the config file is acceptable to sqlite. 

1643 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1644 

1645 

1646class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1647 """InMemoryDatastore specialization of a butler""" 

1648 

1649 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1650 fullConfigKey = None 

1651 useTempRoot = False 

1652 validationCanFail = False 

1653 datastoreStr = ["datastore='InMemory"] 

1654 datastoreName = ["InMemoryDatastore@"] 

1655 registryStr = "/gen3.sqlite3" 

1656 

1657 def testIngest(self): 

1658 pass 

1659 

1660 

1661class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1662 """PosixDatastore specialization""" 

1663 

1664 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1665 fullConfigKey = ".datastore.datastores.1.formatters" 

1666 validationCanFail = True 

1667 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1668 datastoreName = [ 

1669 "InMemoryDatastore@", 

1670 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1671 "SecondDatastore", 

1672 ] 

1673 registryStr = "/gen3.sqlite3" 

1674 

1675 

1676class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1677 """Test that a yaml file in one location can refer to a root in another.""" 

1678 

1679 datastoreStr = ["dir1"] 

1680 # Disable the makeRepo test since we are deliberately not using 

1681 # butler.yaml as the config name. 

1682 fullConfigKey = None 

1683 

1684 def setUp(self): 

1685 self.root = makeTestTempDir(TESTDIR) 

1686 

1687 # Make a new repository in one place 

1688 self.dir1 = os.path.join(self.root, "dir1") 

1689 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1690 

1691 # Move the yaml file to a different place and add a "root" 

1692 self.dir2 = os.path.join(self.root, "dir2") 

1693 os.makedirs(self.dir2, exist_ok=True) 

1694 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1695 config = Config(configFile1) 

1696 config["root"] = self.dir1 

1697 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1698 config.dumpToUri(configFile2) 

1699 os.remove(configFile1) 

1700 self.tmpConfigFile = configFile2 

1701 

1702 def testFileLocations(self): 

1703 self.assertNotEqual(self.dir1, self.dir2) 

1704 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1705 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1706 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1707 

1708 

1709class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1710 """Test that a config file created by makeRepo outside of repo works.""" 

1711 

1712 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1713 

1714 def setUp(self): 

1715 self.root = makeTestTempDir(TESTDIR) 

1716 self.root2 = makeTestTempDir(TESTDIR) 

1717 

1718 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1719 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1720 

1721 def tearDown(self): 

1722 if os.path.exists(self.root2): 

1723 shutil.rmtree(self.root2, ignore_errors=True) 

1724 super().tearDown() 

1725 

1726 def testConfigExistence(self): 

1727 c = Config(self.tmpConfigFile) 

1728 uri_config = ResourcePath(c["root"]) 

1729 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1730 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1731 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1732 

1733 def testPutGet(self): 

1734 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1735 self.runPutGetTest(storageClass, "test_metric") 

1736 

1737 

1738class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1739 """Test that a config file created by makeRepo outside of repo works.""" 

1740 

1741 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1742 

1743 def setUp(self): 

1744 self.root = makeTestTempDir(TESTDIR) 

1745 self.root2 = makeTestTempDir(TESTDIR) 

1746 

1747 self.tmpConfigFile = self.root2 

1748 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1749 

1750 def testConfigExistence(self): 

1751 # Append the yaml file else Config constructor does not know the file 

1752 # type. 

1753 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1754 super().testConfigExistence() 

1755 

1756 

1757class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1758 """Test that a config file created by makeRepo outside of repo works.""" 

1759 

1760 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1761 

1762 def setUp(self): 

1763 self.root = makeTestTempDir(TESTDIR) 

1764 self.root2 = makeTestTempDir(TESTDIR) 

1765 

1766 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1767 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1768 

1769 

1770@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1771class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1772 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1773 a local in-memory SqlRegistry. 

1774 """ 

1775 

1776 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1777 fullConfigKey = None 

1778 validationCanFail = True 

1779 

1780 bucketName = "anybucketname" 

1781 """Name of the Bucket that will be used in the tests. The name is read from 

1782 the config file used with the tests during set-up. 

1783 """ 

1784 

1785 root = "butlerRoot/" 

1786 """Root repository directory expected to be used in case useTempRoot=False. 

1787 Otherwise the root is set to a 20 characters long randomly generated string 

1788 during set-up. 

1789 """ 

1790 

1791 datastoreStr = [f"datastore={root}"] 

1792 """Contains all expected root locations in a format expected to be 

1793 returned by Butler stringification. 

1794 """ 

1795 

1796 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1797 """The expected format of the S3 Datastore string.""" 

1798 

1799 registryStr = "/gen3.sqlite3" 

1800 """Expected format of the Registry string.""" 

1801 

1802 mock_s3 = mock_s3() 

1803 """The mocked s3 interface from moto.""" 

1804 

1805 def genRoot(self): 

1806 """Returns a random string of len 20 to serve as a root 

1807 name for the temporary bucket repo. 

1808 

1809 This is equivalent to tempfile.mkdtemp as this is what self.root 

1810 becomes when useTempRoot is True. 

1811 """ 

1812 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1813 return rndstr + "/" 

1814 

1815 def setUp(self): 

1816 config = Config(self.configFile) 

1817 uri = ResourcePath(config[".datastore.datastore.root"]) 

1818 self.bucketName = uri.netloc 

1819 

1820 # Enable S3 mocking of tests. 

1821 self.mock_s3.start() 

1822 

1823 # set up some fake credentials if they do not exist 

1824 self.usingDummyCredentials = setAwsEnvCredentials() 

1825 

1826 if self.useTempRoot: 

1827 self.root = self.genRoot() 

1828 rooturi = f"s3://{self.bucketName}/{self.root}" 

1829 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1830 

1831 # need local folder to store registry database 

1832 self.reg_dir = makeTestTempDir(TESTDIR) 

1833 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1834 

1835 # MOTO needs to know that we expect Bucket bucketname to exist 

1836 # (this used to be the class attribute bucketName) 

1837 s3 = boto3.resource("s3") 

1838 s3.create_bucket(Bucket=self.bucketName) 

1839 

1840 self.datastoreStr = f"datastore={self.root}" 

1841 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1842 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1843 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1844 

1845 def tearDown(self): 

1846 s3 = boto3.resource("s3") 

1847 bucket = s3.Bucket(self.bucketName) 

1848 try: 

1849 bucket.objects.all().delete() 

1850 except botocore.exceptions.ClientError as e: 

1851 if e.response["Error"]["Code"] == "404": 

1852 # the key was not reachable - pass 

1853 pass 

1854 else: 

1855 raise 

1856 

1857 bucket = s3.Bucket(self.bucketName) 

1858 bucket.delete() 

1859 

1860 # Stop the S3 mock. 

1861 self.mock_s3.stop() 

1862 

1863 # unset any potentially set dummy credentials 

1864 if self.usingDummyCredentials: 

1865 unsetAwsEnvCredentials() 

1866 

1867 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1868 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1869 

1870 if self.useTempRoot and os.path.exists(self.root): 

1871 shutil.rmtree(self.root, ignore_errors=True) 

1872 

1873 super().tearDown() 

1874 

1875 

1876@unittest.skipIf(WsgiDAVApp is None, "Warning: wsgidav/cheroot not found!") 

1877class WebdavDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1878 """WebdavDatastore specialization of a butler; a Webdav storage Datastore + 

1879 a local in-memory SqlRegistry. 

1880 """ 

1881 

1882 configFile = os.path.join(TESTDIR, "config/basic/butler-webdavstore.yaml") 

1883 fullConfigKey = None 

1884 validationCanFail = True 

1885 

1886 serverName = "localhost" 

1887 """Name of the server that will be used in the tests. 

1888 """ 

1889 

1890 portNumber = 8080 

1891 """Port on which the webdav server listens. Automatically chosen 

1892 at setUpClass via the _getfreeport() method 

1893 """ 

1894 

1895 root = "butlerRoot/" 

1896 """Root repository directory expected to be used in case useTempRoot=False. 

1897 Otherwise the root is set to a 20 characters long randomly generated string 

1898 during set-up. 

1899 """ 

1900 

1901 datastoreStr = [f"datastore={root}"] 

1902 """Contains all expected root locations in a format expected to be 

1903 returned by Butler stringification. 

1904 """ 

1905 

1906 datastoreName = ["FileDatastore@https://{serverName}/{root}"] 

1907 """The expected format of the WebdavDatastore string.""" 

1908 

1909 registryStr = "/gen3.sqlite3" 

1910 """Expected format of the Registry string.""" 

1911 

1912 serverThread = None 

1913 """Thread in which the local webdav server will run""" 

1914 

1915 stopWebdavServer = False 

1916 """This flag will cause the webdav server to 

1917 gracefully shut down when True 

1918 """ 

1919 

1920 def genRoot(self): 

1921 """Returns a random string of len 20 to serve as a root 

1922 name for the temporary bucket repo. 

1923 

1924 This is equivalent to tempfile.mkdtemp as this is what self.root 

1925 becomes when useTempRoot is True. 

1926 """ 

1927 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1928 return rndstr + "/" 

1929 

1930 @classmethod 

1931 def setUpClass(cls): 

1932 # Do the same as inherited class 

1933 cls.storageClassFactory = StorageClassFactory() 

1934 cls.storageClassFactory.addFromConfig(cls.configFile) 

1935 

1936 cls.portNumber = cls._getfreeport() 

1937 # Run a local webdav server on which tests will be run 

1938 cls.serverThread = Thread( 

1939 target=cls._serveWebdav, args=(cls, cls.portNumber, lambda: cls.stopWebdavServer), daemon=True 

1940 ) 

1941 cls.serverThread.start() 

1942 # Wait for it to start 

1943 time.sleep(3) 

1944 

1945 @classmethod 

1946 def tearDownClass(cls): 

1947 # Ask for graceful shut down of the webdav server 

1948 cls.stopWebdavServer = True 

1949 # Wait for the thread to exit 

1950 cls.serverThread.join() 

1951 super().tearDownClass() 

1952 

1953 def setUp(self): 

1954 config = Config(self.configFile) 

1955 

1956 if self.useTempRoot: 

1957 self.root = self.genRoot() 

1958 self.rooturi = f"http://{self.serverName}:{self.portNumber}/{self.root}" 

1959 config.update({"datastore": {"datastore": {"root": self.rooturi}}}) 

1960 

1961 # need local folder to store registry database 

1962 self.reg_dir = makeTestTempDir(TESTDIR) 

1963 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1964 

1965 self.datastoreStr = f"datastore={self.root}" 

1966 self.datastoreName = [f"FileDatastore@{self.rooturi}"] 

1967 

1968 if not _is_webdav_endpoint(self.rooturi): 

1969 raise OSError("Webdav server not running properly: cannot run tests.") 

1970 

1971 Butler.makeRepo(self.rooturi, config=config, forceConfigRoot=False) 

1972 self.tmpConfigFile = posixpath.join(self.rooturi, "butler.yaml") 

1973 

1974 def tearDown(self): 

1975 # Clear temporary directory 

1976 ResourcePath(self.rooturi).remove() 

1977 ResourcePath(self.rooturi).session.close() 

1978 

1979 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1980 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1981 

1982 if self.useTempRoot and os.path.exists(self.root): 

1983 shutil.rmtree(self.root, ignore_errors=True) 

1984 

1985 super().tearDown() 

1986 

1987 def _serveWebdav(self, port: int, stopWebdavServer): 

1988 """Starts a local webdav-compatible HTTP server, 

1989 Listening on http://localhost:port 

1990 This server only runs when this test class is instantiated, 

1991 and then shuts down. Must be started is a separate thread. 

1992 

1993 Parameters 

1994 ---------- 

1995 port : `int` 

1996 The port number on which the server should listen 

1997 """ 

1998 root_path = gettempdir() 

1999 

2000 config = { 

2001 "host": "0.0.0.0", 

2002 "port": port, 

2003 "provider_mapping": {"/": root_path}, 

2004 "http_authenticator": {"domain_controller": None}, 

2005 "simple_dc": {"user_mapping": {"*": True}}, 

2006 "verbose": 0, 

2007 } 

2008 app = WsgiDAVApp(config) 

2009 

2010 server_args = { 

2011 "bind_addr": (config["host"], config["port"]), 

2012 "wsgi_app": app, 

2013 } 

2014 server = wsgi.Server(**server_args) 

2015 server.prepare() 

2016 

2017 try: 

2018 # Start the actual server in a separate thread 

2019 t = Thread(target=server.serve, daemon=True) 

2020 t.start() 

2021 # watch stopWebdavServer, and gracefully 

2022 # shut down the server when True 

2023 while True: 

2024 if stopWebdavServer(): 

2025 break 

2026 time.sleep(1) 

2027 except KeyboardInterrupt: 

2028 print("Caught Ctrl-C, shutting down...") 

2029 finally: 

2030 server.stop() 

2031 t.join() 

2032 

2033 def _getfreeport(): 

2034 """ 

2035 Determines a free port using sockets. 

2036 """ 

2037 free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

2038 free_socket.bind(("127.0.0.1", 0)) 

2039 free_socket.listen() 

2040 port = free_socket.getsockname()[1] 

2041 free_socket.close() 

2042 return port 

2043 

2044 

2045class PosixDatastoreTransfers(unittest.TestCase): 

2046 """Test data transfers between butlers. 

2047 

2048 Test for different managers. UUID to UUID and integer to integer are 

2049 tested. UUID to integer is not supported since we do not currently 

2050 want to allow that. Integer to UUID is supported with the caveat 

2051 that UUID4 will be generated and this will be incorrect for raw 

2052 dataset types. The test ignores that. 

2053 """ 

2054 

2055 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2056 

2057 @classmethod 

2058 def setUpClass(cls): 

2059 cls.storageClassFactory = StorageClassFactory() 

2060 cls.storageClassFactory.addFromConfig(cls.configFile) 

2061 

2062 def setUp(self): 

2063 self.root = makeTestTempDir(TESTDIR) 

2064 self.config = Config(self.configFile) 

2065 

2066 def tearDown(self): 

2067 removeTestTempDir(self.root) 

2068 

2069 def create_butler(self, manager, label): 

2070 config = Config(self.configFile) 

2071 config["registry", "managers", "datasets"] = manager 

2072 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2073 

2074 def create_butlers(self, manager1, manager2): 

2075 self.source_butler = self.create_butler(manager1, "1") 

2076 self.target_butler = self.create_butler(manager2, "2") 

2077 

2078 def testTransferUuidToUuid(self): 

2079 self.create_butlers( 

2080 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2081 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2082 ) 

2083 # Setting id_gen_map should have no effect here 

2084 self.assertButlerTransfers(id_gen_map={"random_data_2": DatasetIdGenEnum.DATAID_TYPE}) 

2085 

2086 def testTransferMissing(self): 

2087 """Test transfers where datastore records are missing. 

2088 

2089 This is how execution butler works. 

2090 """ 

2091 self.create_butlers( 

2092 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2093 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2094 ) 

2095 

2096 # Configure the source butler to allow trust. 

2097 self.source_butler.datastore.trustGetRequest = True 

2098 

2099 self.assertButlerTransfers(purge=True) 

2100 

2101 def testTransferMissingDisassembly(self): 

2102 """Test transfers where datastore records are missing. 

2103 

2104 This is how execution butler works. 

2105 """ 

2106 self.create_butlers( 

2107 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2108 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID", 

2109 ) 

2110 

2111 # Configure the source butler to allow trust. 

2112 self.source_butler.datastore.trustGetRequest = True 

2113 

2114 # Test disassembly. 

2115 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2116 

2117 def assertButlerTransfers(self, id_gen_map=None, purge=False, storageClassName="StructuredData"): 

2118 """Test that a run can be transferred to another butler.""" 

2119 

2120 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2121 datasetTypeName = "random_data" 

2122 

2123 # Test will create 3 collections and we will want to transfer 

2124 # two of those three. 

2125 runs = ["run1", "run2", "other"] 

2126 

2127 # Also want to use two different dataset types to ensure that 

2128 # grouping works. 

2129 datasetTypeNames = ["random_data", "random_data_2"] 

2130 

2131 # Create the run collections in the source butler. 

2132 for run in runs: 

2133 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2134 

2135 # Create dimensions in source butler. 

2136 n_exposures = 30 

2137 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2138 self.source_butler.registry.insertDimensionData( 

2139 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2140 ) 

2141 self.source_butler.registry.insertDimensionData( 

2142 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2143 ) 

2144 

2145 for i in range(n_exposures): 

2146 self.source_butler.registry.insertDimensionData( 

2147 "exposure", 

2148 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2149 ) 

2150 

2151 # Create dataset types in the source butler. 

2152 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2153 for datasetTypeName in datasetTypeNames: 

2154 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2155 self.source_butler.registry.registerDatasetType(datasetType) 

2156 

2157 # Write a dataset to an unrelated run -- this will ensure that 

2158 # we are rewriting integer dataset ids in the target if necessary. 

2159 # Will not be relevant for UUID. 

2160 run = "distraction" 

2161 butler = Butler(butler=self.source_butler, run=run) 

2162 butler.put( 

2163 makeExampleMetrics(), 

2164 datasetTypeName, 

2165 exposure=1, 

2166 instrument="DummyCamComp", 

2167 physical_filter="d-r", 

2168 ) 

2169 

2170 # Write some example metrics to the source 

2171 butler = Butler(butler=self.source_butler) 

2172 

2173 # Set of DatasetRefs that should be in the list of refs to transfer 

2174 # but which will not be transferred. 

2175 deleted = set() 

2176 

2177 n_expected = 20 # Number of datasets expected to be transferred 

2178 source_refs = [] 

2179 for i in range(n_exposures): 

2180 # Put a third of datasets into each collection, only retain 

2181 # two thirds. 

2182 index = i % 3 

2183 run = runs[index] 

2184 datasetTypeName = datasetTypeNames[i % 2] 

2185 

2186 metric_data = { 

2187 "summary": {"counter": i}, 

2188 "output": {"text": "metric"}, 

2189 "data": [2 * x for x in range(i)], 

2190 } 

2191 metric = MetricsExample(**metric_data) 

2192 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2193 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2194 

2195 # Remove the datastore record using low-level API 

2196 if purge: 

2197 # Remove records for a fraction. 

2198 if index == 1: 

2199 

2200 # For one of these delete the file as well. 

2201 # This allows the "missing" code to filter the 

2202 # file out. 

2203 if not deleted: 

2204 primary, uris = butler.datastore.getURIs(ref) 

2205 if primary: 

2206 primary.remove() 

2207 for uri in uris.values(): 

2208 uri.remove() 

2209 n_expected -= 1 

2210 deleted.add(ref) 

2211 

2212 # Remove the datastore record. 

2213 butler.datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

2214 

2215 if index < 2: 

2216 source_refs.append(ref) 

2217 if ref not in deleted: 

2218 new_metric = butler.get(ref.unresolved(), collections=run) 

2219 self.assertEqual(new_metric, metric) 

2220 

2221 # Create some bad dataset types to ensure we check for inconsistent 

2222 # definitions. 

2223 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2224 for datasetTypeName in datasetTypeNames: 

2225 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2226 self.target_butler.registry.registerDatasetType(datasetType) 

2227 with self.assertRaises(ConflictingDefinitionError) as cm: 

2228 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2229 self.assertIn("dataset type differs", str(cm.exception)) 

2230 

2231 # And remove the bad definitions. 

2232 for datasetTypeName in datasetTypeNames: 

2233 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2234 

2235 # Transfer without creating dataset types should fail. 

2236 with self.assertRaises(KeyError): 

2237 self.target_butler.transfer_from(self.source_butler, source_refs, id_gen_map=id_gen_map) 

2238 

2239 # Transfer without creating dimensions should fail. 

2240 with self.assertRaises(ConflictingDefinitionError) as cm: 

2241 self.target_butler.transfer_from( 

2242 self.source_butler, source_refs, id_gen_map=id_gen_map, register_dataset_types=True 

2243 ) 

2244 self.assertIn("dimension", str(cm.exception)) 

2245 

2246 # The failed transfer above leaves registry in an inconsistent 

2247 # state because the run is created but then rolled back without 

2248 # the collection cache being cleared. For now force a refresh. 

2249 # Can remove with DM-35498. 

2250 self.target_butler.registry.refresh() 

2251 

2252 # Now transfer them to the second butler, including dimensions. 

2253 with self.assertLogs(level=logging.DEBUG) as cm: 

2254 transferred = self.target_butler.transfer_from( 

2255 self.source_butler, 

2256 source_refs, 

2257 id_gen_map=id_gen_map, 

2258 register_dataset_types=True, 

2259 transfer_dimensions=True, 

2260 ) 

2261 self.assertEqual(len(transferred), n_expected) 

2262 log_output = ";".join(cm.output) 

2263 self.assertIn("found in datastore for chunk", log_output) 

2264 self.assertIn("Creating output run", log_output) 

2265 

2266 # Do the transfer twice to ensure that it will do nothing extra. 

2267 # Only do this if purge=True because it does not work for int 

2268 # dataset_id. 

2269 if purge: 

2270 # This should not need to register dataset types. 

2271 transferred = self.target_butler.transfer_from( 

2272 self.source_butler, source_refs, id_gen_map=id_gen_map 

2273 ) 

2274 self.assertEqual(len(transferred), n_expected) 

2275 

2276 # Also do an explicit low-level transfer to trigger some 

2277 # edge cases. 

2278 with self.assertLogs(level=logging.DEBUG) as cm: 

2279 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2280 log_output = ";".join(cm.output) 

2281 self.assertIn("no file artifacts exist", log_output) 

2282 

2283 with self.assertRaises(TypeError): 

2284 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) 

2285 

2286 with self.assertRaises(ValueError): 

2287 self.target_butler.datastore.transfer_from( 

2288 self.source_butler.datastore, source_refs, transfer="split" 

2289 ) 

2290 

2291 # Now try to get the same refs from the new butler. 

2292 for ref in source_refs: 

2293 if ref not in deleted: 

2294 unresolved_ref = ref.unresolved() 

2295 new_metric = self.target_butler.get(unresolved_ref, collections=ref.run) 

2296 old_metric = self.source_butler.get(unresolved_ref, collections=ref.run) 

2297 self.assertEqual(new_metric, old_metric) 

2298 

2299 # Now prune run2 collection and create instead a CHAINED collection. 

2300 # This should block the transfer. 

2301 self.target_butler.pruneCollection("run2", purge=True, unstore=True) 

2302 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2303 with self.assertRaises(CollectionTypeError): 

2304 # Re-importing the run1 datasets can be problematic if they 

2305 # use integer IDs so filter those out. 

2306 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2307 self.target_butler.transfer_from(self.source_butler, to_transfer, id_gen_map=id_gen_map) 

2308 

2309 

2310if __name__ == "__main__": 2310 ↛ 2311line 2310 didn't jump to line 2311, because the condition on line 2310 was never true

2311 unittest.main()