Coverage for tests/test_butler.py: 12%

1232 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-08 05:05 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Tests for Butler. 

23""" 

24from __future__ import annotations 

25 

26import gc 

27import json 

28import logging 

29import os 

30import pathlib 

31import pickle 

32import posixpath 

33import random 

34import shutil 

35import string 

36import tempfile 

37import unittest 

38import uuid 

39from collections.abc import Mapping 

40from typing import TYPE_CHECKING, Any, cast 

41 

42try: 

43 import boto3 

44 import botocore 

45 from moto import mock_s3 # type: ignore[import] 

46except ImportError: 

47 boto3 = None 

48 

49 def mock_s3(cls): 

50 """A no-op decorator in case moto mock_s3 can not be imported.""" 

51 return cls 

52 

53 

54try: 

55 # It's possible but silly to have testing.postgresql installed without 

56 # having the postgresql server installed (because then nothing in 

57 # testing.postgresql would work), so we use the presence of that module 

58 # to test whether we can expect the server to be available. 

59 import testing.postgresql # type: ignore[import] 

60except ImportError: 

61 testing = None 

62 

63import astropy.time 

64import sqlalchemy 

65from lsst.daf.butler import ( 

66 Butler, 

67 ButlerConfig, 

68 CollectionType, 

69 Config, 

70 DataCoordinate, 

71 DatasetExistence, 

72 DatasetRef, 

73 DatasetType, 

74 FileDataset, 

75 FileTemplate, 

76 FileTemplateValidationError, 

77 StorageClassFactory, 

78 ValidationError, 

79 script, 

80) 

81from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG 

82from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

83from lsst.daf.butler.registries.sql import SqlRegistry 

84from lsst.daf.butler.registry import ( 

85 CollectionError, 

86 CollectionTypeError, 

87 ConflictingDefinitionError, 

88 DataIdValueError, 

89 MissingCollectionError, 

90 OrphanedRecordError, 

91) 

92from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

93from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

94from lsst.resources import ResourcePath 

95from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

96from lsst.utils import doImportType 

97from lsst.utils.introspection import get_full_type_name 

98 

99if TYPE_CHECKING: 

100 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

101 

102TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

103 

104 

105def makeExampleMetrics(): 

106 return MetricsExample( 

107 {"AM1": 5.2, "AM2": 30.6}, 

108 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

109 [563, 234, 456.7, 752, 8, 9, 27], 

110 ) 

111 

112 

113class TransactionTestError(Exception): 

114 """Specific error for testing transactions, to prevent misdiagnosing 

115 that might otherwise occur when a standard exception is used. 

116 """ 

117 

118 pass 

119 

120 

121class ButlerConfigTests(unittest.TestCase): 

122 """Simple tests for ButlerConfig that are not tested in any other test 

123 cases.""" 

124 

125 def testSearchPath(self): 

126 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

127 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

128 config1 = ButlerConfig(configFile) 

129 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

130 

131 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

132 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

133 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

134 self.assertIn("testConfigs", "\n".join(cm.output)) 

135 

136 key = ("datastore", "records", "table") 

137 self.assertNotEqual(config1[key], config2[key]) 

138 self.assertEqual(config2[key], "override_record") 

139 

140 

141class ButlerPutGetTests(TestCaseMixin): 

142 """Helper method for running a suite of put/get tests from different 

143 butler configurations.""" 

144 

145 root: str 

146 default_run = "ingésτ😺" 

147 storageClassFactory: StorageClassFactory 

148 configFile: str 

149 tmpConfigFile: str 

150 

151 @staticmethod 

152 def addDatasetType( 

153 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

154 ) -> DatasetType: 

155 """Create a DatasetType and register it""" 

156 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

157 registry.registerDatasetType(datasetType) 

158 return datasetType 

159 

160 @classmethod 

161 def setUpClass(cls) -> None: 

162 cls.storageClassFactory = StorageClassFactory() 

163 cls.storageClassFactory.addFromConfig(cls.configFile) 

164 

165 def assertGetComponents(self, butler, datasetRef, components, reference, collections=None) -> None: 

166 datasetType = datasetRef.datasetType 

167 dataId = datasetRef.dataId 

168 deferred = butler.getDeferred(datasetRef) 

169 

170 for component in components: 

171 compTypeName = datasetType.componentTypeName(component) 

172 result = butler.get(compTypeName, dataId, collections=collections) 

173 self.assertEqual(result, getattr(reference, component)) 

174 result_deferred = deferred.get(component=component) 

175 self.assertEqual(result_deferred, result) 

176 

177 def tearDown(self) -> None: 

178 removeTestTempDir(self.root) 

179 

180 def create_butler( 

181 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

182 ) -> tuple[Butler, DatasetType]: 

183 butler = Butler(self.tmpConfigFile, run=run) 

184 

185 collections = set(butler.registry.queryCollections()) 

186 self.assertEqual(collections, set([run])) 

187 

188 # Create and register a DatasetType 

189 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

190 

191 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

192 

193 # Add needed Dimensions 

194 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

195 butler.registry.insertDimensionData( 

196 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

197 ) 

198 butler.registry.insertDimensionData( 

199 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

200 ) 

201 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

202 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

203 butler.registry.insertDimensionData( 

204 "visit", 

205 { 

206 "instrument": "DummyCamComp", 

207 "id": 423, 

208 "name": "fourtwentythree", 

209 "physical_filter": "d-r", 

210 "visit_system": 1, 

211 "datetime_begin": visit_start, 

212 "datetime_end": visit_end, 

213 }, 

214 ) 

215 

216 # Add more visits for some later tests 

217 for visit_id in (424, 425): 

218 butler.registry.insertDimensionData( 

219 "visit", 

220 { 

221 "instrument": "DummyCamComp", 

222 "id": visit_id, 

223 "name": f"fourtwentyfour_{visit_id}", 

224 "physical_filter": "d-r", 

225 "visit_system": 1, 

226 }, 

227 ) 

228 return butler, datasetType 

229 

230 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

231 # New datasets will be added to run and tag, but we will only look in 

232 # tag when looking up datasets. 

233 run = self.default_run 

234 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

235 assert butler.run is not None 

236 

237 # Create and store a dataset 

238 metric = makeExampleMetrics() 

239 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

240 

241 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

242 # and once with a DatasetType 

243 

244 # Keep track of any collections we add and do not clean up 

245 expected_collections = {run} 

246 

247 counter = 0 

248 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

249 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

250 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

251 # Since we are using subTest we can get cascading failures 

252 # here with the first attempt failing and the others failing 

253 # immediately because the dataset already exists. Work around 

254 # this by using a distinct run collection each time 

255 counter += 1 

256 this_run = f"put_run_{counter}" 

257 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

258 expected_collections.update({this_run}) 

259 

260 with self.subTest(args=args): 

261 kwargs: dict[str, Any] = {} 

262 if not isinstance(args[0], DatasetRef): # type: ignore 

263 kwargs["run"] = this_run 

264 ref = butler.put(metric, *args, **kwargs) 

265 self.assertIsInstance(ref, DatasetRef) 

266 

267 # Test getDirect 

268 metricOut = butler.get(ref) 

269 self.assertEqual(metric, metricOut) 

270 # Test get 

271 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

272 self.assertEqual(metric, metricOut) 

273 # Test get with a datasetRef 

274 metricOut = butler.get(ref) 

275 self.assertEqual(metric, metricOut) 

276 # Test getDeferred with dataId 

277 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

278 self.assertEqual(metric, metricOut) 

279 # Test getDeferred with a ref 

280 metricOut = butler.getDeferred(ref).get() 

281 self.assertEqual(metric, metricOut) 

282 

283 # Check we can get components 

284 if storageClass.isComposite(): 

285 self.assertGetComponents( 

286 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

287 ) 

288 

289 # Can the artifacts themselves be retrieved? 

290 if not butler.datastore.isEphemeral: 

291 root_uri = ResourcePath(self.root) 

292 

293 for preserve_path in (True, False): 

294 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

295 # Use copy so that we can test that overwrite 

296 # protection works (using "auto" for File URIs would 

297 # use hard links and subsequent transfer would work 

298 # because it knows they are the same file). 

299 transferred = butler.retrieveArtifacts( 

300 [ref], destination, preserve_path=preserve_path, transfer="copy" 

301 ) 

302 self.assertGreater(len(transferred), 0) 

303 artifacts = list(ResourcePath.findFileResources([destination])) 

304 self.assertEqual(set(transferred), set(artifacts)) 

305 

306 for artifact in transferred: 

307 path_in_destination = artifact.relative_to(destination) 

308 self.assertIsNotNone(path_in_destination) 

309 assert path_in_destination is not None 

310 

311 # when path is not preserved there should not be 

312 # any path separators. 

313 num_seps = path_in_destination.count("/") 

314 if preserve_path: 

315 self.assertGreater(num_seps, 0) 

316 else: 

317 self.assertEqual(num_seps, 0) 

318 

319 primary_uri, secondary_uris = butler.datastore.getURIs(ref) 

320 n_uris = len(secondary_uris) 

321 if primary_uri: 

322 n_uris += 1 

323 self.assertEqual( 

324 len(artifacts), 

325 n_uris, 

326 "Comparing expected artifacts vs actual:" 

327 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

328 ) 

329 

330 if preserve_path: 

331 # No need to run these twice 

332 with self.assertRaises(ValueError): 

333 butler.retrieveArtifacts([ref], destination, transfer="move") 

334 

335 with self.assertRaises(FileExistsError): 

336 butler.retrieveArtifacts([ref], destination) 

337 

338 transferred_again = butler.retrieveArtifacts( 

339 [ref], destination, preserve_path=preserve_path, overwrite=True 

340 ) 

341 self.assertEqual(set(transferred_again), set(transferred)) 

342 

343 # Now remove the dataset completely. 

344 butler.pruneDatasets([ref], purge=True, unstore=True) 

345 # Lookup with original args should still fail. 

346 self.assertFalse(butler.exists(*args, collections=this_run)) 

347 # get() should still fail. 

348 with self.assertRaises(FileNotFoundError): 

349 butler.get(ref) 

350 # Registry shouldn't be able to find it by dataset_id anymore. 

351 self.assertIsNone(butler.registry.getDataset(ref.id)) 

352 

353 # Do explicit registry removal since we know they are 

354 # empty 

355 butler.registry.removeCollection(this_run) 

356 expected_collections.remove(this_run) 

357 

358 # Create DatasetRef for put using default run. 

359 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

360 

361 # Put the dataset again, since the last thing we did was remove it 

362 # and we want to use the default collection. 

363 ref = butler.put(metric, refIn) 

364 

365 # Get with parameters 

366 stop = 4 

367 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

368 self.assertNotEqual(metric, sliced) 

369 self.assertEqual(metric.summary, sliced.summary) 

370 self.assertEqual(metric.output, sliced.output) 

371 self.assertEqual(metric.data[:stop], sliced.data) 

372 # getDeferred with parameters 

373 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

374 self.assertNotEqual(metric, sliced) 

375 self.assertEqual(metric.summary, sliced.summary) 

376 self.assertEqual(metric.output, sliced.output) 

377 self.assertEqual(metric.data[:stop], sliced.data) 

378 # getDeferred with deferred parameters 

379 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

380 self.assertNotEqual(metric, sliced) 

381 self.assertEqual(metric.summary, sliced.summary) 

382 self.assertEqual(metric.output, sliced.output) 

383 self.assertEqual(metric.data[:stop], sliced.data) 

384 

385 if storageClass.isComposite(): 

386 # Check that components can be retrieved 

387 metricOut = butler.get(ref.datasetType.name, dataId) 

388 compNameS = ref.datasetType.componentTypeName("summary") 

389 compNameD = ref.datasetType.componentTypeName("data") 

390 summary = butler.get(compNameS, dataId) 

391 self.assertEqual(summary, metric.summary) 

392 data = butler.get(compNameD, dataId) 

393 self.assertEqual(data, metric.data) 

394 

395 if "counter" in storageClass.derivedComponents: 

396 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

397 self.assertEqual(count, len(data)) 

398 

399 count = butler.get( 

400 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

401 ) 

402 self.assertEqual(count, stop) 

403 

404 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

405 assert compRef is not None 

406 summary = butler.get(compRef) 

407 self.assertEqual(summary, metric.summary) 

408 

409 # Create a Dataset type that has the same name but is inconsistent. 

410 inconsistentDatasetType = DatasetType( 

411 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

412 ) 

413 

414 # Getting with a dataset type that does not match registry fails 

415 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

416 butler.get(inconsistentDatasetType, dataId) 

417 

418 # Combining a DatasetRef with a dataId should fail 

419 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

420 butler.get(ref, dataId) 

421 # Getting with an explicit ref should fail if the id doesn't match. 

422 with self.assertRaises(FileNotFoundError): 

423 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

424 

425 # Getting a dataset with unknown parameters should fail 

426 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

427 butler.get(ref, parameters={"unsupported": True}) 

428 

429 # Check we have a collection 

430 collections = set(butler.registry.queryCollections()) 

431 self.assertEqual(collections, expected_collections) 

432 

433 # Clean up to check that we can remove something that may have 

434 # already had a component removed 

435 butler.pruneDatasets([ref], unstore=True, purge=True) 

436 

437 # Add the same ref again, so we can check that duplicate put fails. 

438 ref = butler.put(metric, datasetType, dataId) 

439 

440 # Repeat put will fail. 

441 with self.assertRaisesRegex( 

442 ConflictingDefinitionError, "A database constraint failure was triggered" 

443 ): 

444 butler.put(metric, datasetType, dataId) 

445 

446 # Remove the datastore entry. 

447 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

448 

449 # Put will still fail 

450 with self.assertRaisesRegex( 

451 ConflictingDefinitionError, "A database constraint failure was triggered" 

452 ): 

453 butler.put(metric, datasetType, dataId) 

454 

455 # Repeat the same sequence with resolved ref. 

456 butler.pruneDatasets([ref], unstore=True, purge=True) 

457 ref = butler.put(metric, refIn) 

458 

459 # Repeat put will fail. 

460 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

461 butler.put(metric, refIn) 

462 

463 # Remove the datastore entry. 

464 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

465 

466 # In case of resolved ref this write will succeed. 

467 ref = butler.put(metric, refIn) 

468 

469 # Leave the dataset in place since some downstream tests require 

470 # something to be present 

471 

472 return butler 

473 

474 def testDeferredCollectionPassing(self) -> None: 

475 # Construct a butler with no run or collection, but make it writeable. 

476 butler = Butler(self.tmpConfigFile, writeable=True) 

477 # Create and register a DatasetType 

478 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

479 datasetType = self.addDatasetType( 

480 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

481 ) 

482 # Add needed Dimensions 

483 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

484 butler.registry.insertDimensionData( 

485 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

486 ) 

487 butler.registry.insertDimensionData( 

488 "visit", 

489 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

490 ) 

491 dataId = {"instrument": "DummyCamComp", "visit": 423} 

492 # Create dataset. 

493 metric = makeExampleMetrics() 

494 # Register a new run and put dataset. 

495 run = "deferred" 

496 self.assertTrue(butler.registry.registerRun(run)) 

497 # Second time it will be allowed but indicate no-op 

498 self.assertFalse(butler.registry.registerRun(run)) 

499 ref = butler.put(metric, datasetType, dataId, run=run) 

500 # Putting with no run should fail with TypeError. 

501 with self.assertRaises(CollectionError): 

502 butler.put(metric, datasetType, dataId) 

503 # Dataset should exist. 

504 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

505 # We should be able to get the dataset back, but with and without 

506 # a deferred dataset handle. 

507 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

508 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

509 # Trying to find the dataset without any collection is a TypeError. 

510 self.assertFalse(butler.exists(datasetType, dataId)) 

511 with self.assertRaises(CollectionError): 

512 butler.get(datasetType, dataId) 

513 # Associate the dataset with a different collection. 

514 butler.registry.registerCollection("tagged") 

515 butler.registry.associate("tagged", [ref]) 

516 # Deleting the dataset from the new collection should make it findable 

517 # in the original collection. 

518 butler.pruneDatasets([ref], tags=["tagged"]) 

519 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

520 

521 

522class ButlerTests(ButlerPutGetTests): 

523 """Tests for Butler.""" 

524 

525 useTempRoot = True 

526 validationCanFail: bool 

527 fullConfigKey: str | None 

528 registryStr: str | None 

529 datastoreName: list[str] | None 

530 datastoreStr: list[str] 

531 

532 def setUp(self) -> None: 

533 """Create a new butler root for each test.""" 

534 self.root = makeTestTempDir(TESTDIR) 

535 Butler.makeRepo(self.root, config=Config(self.configFile)) 

536 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

537 

538 def testConstructor(self) -> None: 

539 """Independent test of constructor.""" 

540 butler = Butler(self.tmpConfigFile, run=self.default_run) 

541 self.assertIsInstance(butler, Butler) 

542 

543 # Check that butler.yaml is added automatically. 

544 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

545 config_dir = self.tmpConfigFile[: -len(end)] 

546 butler = Butler(config_dir, run=self.default_run) 

547 self.assertIsInstance(butler, Butler) 

548 

549 # Even with a ResourcePath. 

550 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

551 self.assertIsInstance(butler, Butler) 

552 

553 collections = set(butler.registry.queryCollections()) 

554 self.assertEqual(collections, {self.default_run}) 

555 

556 # Check that some special characters can be included in run name. 

557 special_run = "u@b.c-A" 

558 butler_special = Butler(butler=butler, run=special_run) 

559 collections = set(butler_special.registry.queryCollections("*@*")) 

560 self.assertEqual(collections, {special_run}) 

561 

562 butler2 = Butler(butler=butler, collections=["other"]) 

563 self.assertEqual(butler2.collections, ("other",)) 

564 self.assertIsNone(butler2.run) 

565 self.assertIs(butler.datastore, butler2.datastore) 

566 

567 # Test that we can use an environment variable to find this 

568 # repository. 

569 butler_index = Config() 

570 butler_index["label"] = self.tmpConfigFile 

571 for suffix in (".yaml", ".json"): 

572 # Ensure that the content differs so that we know that 

573 # we aren't reusing the cache. 

574 bad_label = f"s3://bucket/not_real{suffix}" 

575 butler_index["bad_label"] = bad_label 

576 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

577 butler_index.dumpToUri(temp_file) 

578 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

579 self.assertEqual(Butler.get_known_repos(), set(("label", "bad_label"))) 

580 uri = Butler.get_repo_uri("bad_label") 

581 self.assertEqual(uri, ResourcePath(bad_label)) 

582 uri = Butler.get_repo_uri("label") 

583 butler = Butler(uri, writeable=False) 

584 self.assertIsInstance(butler, Butler) 

585 butler = Butler("label", writeable=False) 

586 self.assertIsInstance(butler, Butler) 

587 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

588 Butler("not_there", writeable=False) 

589 with self.assertRaises(KeyError) as cm: 

590 Butler.get_repo_uri("missing") 

591 self.assertEqual(Butler.get_repo_uri("missing", True), ResourcePath("missing")) 

592 self.assertIn("not known to", str(cm.exception)) 

593 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

594 with self.assertRaises(FileNotFoundError): 

595 Butler.get_repo_uri("label") 

596 self.assertEqual(Butler.get_known_repos(), set()) 

597 with self.assertRaises(KeyError) as cm: 

598 # No environment variable set. 

599 Butler.get_repo_uri("label") 

600 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label")) 

601 self.assertIn("No repository index defined", str(cm.exception)) 

602 with self.assertRaisesRegex(FileNotFoundError, "no known aliases"): 

603 # No aliases registered. 

604 Butler("not_there") 

605 self.assertEqual(Butler.get_known_repos(), set()) 

606 

607 def testBasicPutGet(self) -> None: 

608 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

609 self.runPutGetTest(storageClass, "test_metric") 

610 

611 def testCompositePutGetConcrete(self) -> None: 

612 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

613 butler = self.runPutGetTest(storageClass, "test_metric") 

614 

615 # Should *not* be disassembled 

616 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

617 self.assertEqual(len(datasets), 1) 

618 uri, components = butler.getURIs(datasets[0]) 

619 self.assertIsInstance(uri, ResourcePath) 

620 self.assertFalse(components) 

621 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

622 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

623 

624 # Predicted dataset 

625 dataId = {"instrument": "DummyCamComp", "visit": 424} 

626 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

627 self.assertFalse(components) 

628 self.assertIsInstance(uri, ResourcePath) 

629 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

630 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

631 

632 def testCompositePutGetVirtual(self) -> None: 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

634 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

635 

636 # Should be disassembled 

637 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

638 self.assertEqual(len(datasets), 1) 

639 uri, components = butler.getURIs(datasets[0]) 

640 

641 if butler.datastore.isEphemeral: 

642 # Never disassemble in-memory datastore 

643 self.assertIsInstance(uri, ResourcePath) 

644 self.assertFalse(components) 

645 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

646 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

647 else: 

648 self.assertIsNone(uri) 

649 self.assertEqual(set(components), set(storageClass.components)) 

650 for compuri in components.values(): 

651 self.assertIsInstance(compuri, ResourcePath) 

652 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

653 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

654 

655 # Predicted dataset 

656 dataId = {"instrument": "DummyCamComp", "visit": 424} 

657 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

658 

659 if butler.datastore.isEphemeral: 

660 # Never disassembled 

661 self.assertIsInstance(uri, ResourcePath) 

662 self.assertFalse(components) 

663 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

664 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

665 else: 

666 self.assertIsNone(uri) 

667 self.assertEqual(set(components), set(storageClass.components)) 

668 for compuri in components.values(): 

669 self.assertIsInstance(compuri, ResourcePath) 

670 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

671 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

672 

673 def testStorageClassOverrideGet(self) -> None: 

674 """Test storage class conversion on get with override.""" 

675 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

676 datasetTypeName = "anything" 

677 run = self.default_run 

678 

679 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

680 

681 # Create and store a dataset. 

682 metric = makeExampleMetrics() 

683 dataId = {"instrument": "DummyCamComp", "visit": 423} 

684 

685 ref = butler.put(metric, datasetType, dataId) 

686 

687 # Return native type. 

688 retrieved = butler.get(ref) 

689 self.assertEqual(retrieved, metric) 

690 

691 # Specify an override. 

692 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

693 model = butler.get(ref, storageClass=new_sc) 

694 self.assertNotEqual(type(model), type(retrieved)) 

695 self.assertIs(type(model), new_sc.pytype) 

696 self.assertEqual(retrieved, model) 

697 

698 # Defer but override later. 

699 deferred = butler.getDeferred(ref) 

700 model = deferred.get(storageClass=new_sc) 

701 self.assertIs(type(model), new_sc.pytype) 

702 self.assertEqual(retrieved, model) 

703 

704 # Defer but override up front. 

705 deferred = butler.getDeferred(ref, storageClass=new_sc) 

706 model = deferred.get() 

707 self.assertIs(type(model), new_sc.pytype) 

708 self.assertEqual(retrieved, model) 

709 

710 # Retrieve a component. Should be a tuple. 

711 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

712 self.assertIs(type(data), tuple) 

713 self.assertEqual(data, tuple(retrieved.data)) 

714 

715 # Parameter on the write storage class should work regardless 

716 # of read storage class. 

717 data = butler.get( 

718 "anything.data", 

719 dataId, 

720 storageClass="StructuredDataDataTestTuple", 

721 parameters={"slice": slice(2, 4)}, 

722 ) 

723 self.assertEqual(len(data), 2) 

724 

725 # Try a parameter that is known to the read storage class but not 

726 # the write storage class. 

727 with self.assertRaises(KeyError): 

728 butler.get( 

729 "anything.data", 

730 dataId, 

731 storageClass="StructuredDataDataTestTuple", 

732 parameters={"xslice": slice(2, 4)}, 

733 ) 

734 

735 def testPytypePutCoercion(self) -> None: 

736 """Test python type coercion on Butler.get and put.""" 

737 

738 # Store some data with the normal example storage class. 

739 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

740 datasetTypeName = "test_metric" 

741 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

742 

743 dataId = {"instrument": "DummyCamComp", "visit": 423} 

744 

745 # Put a dict and this should coerce to a MetricsExample 

746 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

747 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

748 test_metric = butler.get(metric_ref) 

749 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

750 self.assertEqual(test_metric.summary, test_dict["summary"]) 

751 self.assertEqual(test_metric.output, test_dict["output"]) 

752 

753 # Check that the put still works if a DatasetType is given with 

754 # a definition matching this python type. 

755 registry_type = butler.registry.getDatasetType(datasetTypeName) 

756 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

757 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

758 self.assertEqual(metric2_ref.datasetType, registry_type) 

759 

760 # The get will return the type expected by registry. 

761 test_metric2 = butler.get(metric2_ref) 

762 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

763 

764 # Make a new DatasetRef with the compatible but different DatasetType. 

765 # This should now return a dict. 

766 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

767 test_dict2 = butler.get(new_ref) 

768 self.assertEqual(get_full_type_name(test_dict2), "dict") 

769 

770 # Get it again with the wrong dataset type definition using get() 

771 # rather than get(). This should be consistent with get() 

772 # behavior and return the type of the DatasetType. 

773 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

774 self.assertEqual(get_full_type_name(test_dict3), "dict") 

775 

776 def testIngest(self) -> None: 

777 butler = Butler(self.tmpConfigFile, run=self.default_run) 

778 

779 # Create and register a DatasetType 

780 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"]) 

781 

782 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

783 datasetTypeName = "metric" 

784 

785 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

786 

787 # Add needed Dimensions 

788 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

789 butler.registry.insertDimensionData( 

790 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

791 ) 

792 for detector in (1, 2): 

793 butler.registry.insertDimensionData( 

794 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

795 ) 

796 

797 butler.registry.insertDimensionData( 

798 "visit", 

799 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

800 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

801 ) 

802 

803 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

804 dataRoot = os.path.join(TESTDIR, "data", "basic") 

805 datasets = [] 

806 for detector in (1, 2): 

807 detector_name = f"detector_{detector}" 

808 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

809 dataId = butler.registry.expandDataId( 

810 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

811 ) 

812 # Create a DatasetRef for ingest 

813 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

814 

815 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

816 

817 butler.ingest(*datasets, transfer="copy") 

818 

819 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

820 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

821 

822 metrics1 = butler.get(datasetTypeName, dataId1) 

823 metrics2 = butler.get(datasetTypeName, dataId2) 

824 self.assertNotEqual(metrics1, metrics2) 

825 

826 # Compare URIs 

827 uri1 = butler.getURI(datasetTypeName, dataId1) 

828 uri2 = butler.getURI(datasetTypeName, dataId2) 

829 self.assertNotEqual(uri1, uri2) 

830 

831 # Now do a multi-dataset but single file ingest 

832 metricFile = os.path.join(dataRoot, "detectors.yaml") 

833 refs = [] 

834 for detector in (1, 2): 

835 detector_name = f"detector_{detector}" 

836 dataId = butler.registry.expandDataId( 

837 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

838 ) 

839 # Create a DatasetRef for ingest 

840 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

841 

842 # Test "move" transfer to ensure that the files themselves 

843 # have disappeared following ingest. 

844 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

845 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

846 

847 datasets = [] 

848 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

849 

850 # For first ingest use copy. 

851 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

852 

853 # Now try to ingest again in "execution butler" mode where 

854 # the registry entries exist but the datastore does not have 

855 # the files. We also need to strip the dimension records to ensure 

856 # that they will be re-added by the ingest. 

857 ref = datasets[0].refs[0] 

858 datasets[0].refs = [ 

859 cast( 

860 DatasetRef, 

861 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

862 ) 

863 for ref in datasets[0].refs 

864 ] 

865 all_refs = [] 

866 for dataset in datasets: 

867 refs = [] 

868 for ref in dataset.refs: 

869 # Create a dict from the dataId to drop the records. 

870 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

871 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

872 assert new_ref is not None 

873 self.assertFalse(new_ref.dataId.hasRecords()) 

874 refs.append(new_ref) 

875 dataset.refs = refs 

876 all_refs.extend(dataset.refs) 

877 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

878 

879 # Use move mode to test that the file is deleted. Also 

880 # disable recording of file size. 

881 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

882 

883 # Check that every ref now has records. 

884 for dataset in datasets: 

885 for ref in dataset.refs: 

886 self.assertTrue(ref.dataId.hasRecords()) 

887 

888 # Ensure that the file has disappeared. 

889 self.assertFalse(tempFile.exists()) 

890 

891 # Check that the datastore recorded no file size. 

892 # Not all datastores can support this. 

893 try: 

894 infos = butler.datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

895 self.assertEqual(infos[0].file_size, -1) 

896 except AttributeError: 

897 pass 

898 

899 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

900 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

901 

902 multi1 = butler.get(datasetTypeName, dataId1) 

903 multi2 = butler.get(datasetTypeName, dataId2) 

904 

905 self.assertEqual(multi1, metrics1) 

906 self.assertEqual(multi2, metrics2) 

907 

908 # Compare URIs 

909 uri1 = butler.getURI(datasetTypeName, dataId1) 

910 uri2 = butler.getURI(datasetTypeName, dataId2) 

911 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

912 

913 # Test that removing one does not break the second 

914 # This line will issue a warning log message for a ChainedDatastore 

915 # that uses an InMemoryDatastore since in-memory can not ingest 

916 # files. 

917 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

918 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

919 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

920 multi2b = butler.get(datasetTypeName, dataId2) 

921 self.assertEqual(multi2, multi2b) 

922 

923 # Ensure we can ingest 0 datasets 

924 datasets = [] 

925 butler.ingest(*datasets) 

926 

927 def testPickle(self) -> None: 

928 """Test pickle support.""" 

929 butler = Butler(self.tmpConfigFile, run=self.default_run) 

930 butlerOut = pickle.loads(pickle.dumps(butler)) 

931 self.assertIsInstance(butlerOut, Butler) 

932 self.assertEqual(butlerOut._config, butler._config) 

933 self.assertEqual(butlerOut.collections, butler.collections) 

934 self.assertEqual(butlerOut.run, butler.run) 

935 

936 def testGetDatasetTypes(self) -> None: 

937 butler = Butler(self.tmpConfigFile, run=self.default_run) 

938 dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"]) 

939 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

940 ( 

941 "instrument", 

942 [ 

943 {"instrument": "DummyCam"}, 

944 {"instrument": "DummyHSC"}, 

945 {"instrument": "DummyCamComp"}, 

946 ], 

947 ), 

948 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

949 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

950 ] 

951 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

952 # Add needed Dimensions 

953 for element, data in dimensionEntries: 

954 butler.registry.insertDimensionData(element, *data) 

955 

956 # When a DatasetType is added to the registry entries are not created 

957 # for components but querying them can return the components. 

958 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

959 components = set() 

960 for datasetTypeName in datasetTypeNames: 

961 # Create and register a DatasetType 

962 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

963 

964 for componentName in storageClass.components: 

965 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

966 

967 fromRegistry: set[DatasetType] = set() 

968 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

969 fromRegistry.add(parent_dataset_type) 

970 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

971 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

972 

973 # Now that we have some dataset types registered, validate them 

974 butler.validateConfiguration( 

975 ignore=[ 

976 "test_metric_comp", 

977 "metric3", 

978 "metric5", 

979 "calexp", 

980 "DummySC", 

981 "datasetType.component", 

982 "random_data", 

983 "random_data_2", 

984 ] 

985 ) 

986 

987 # Add a new datasetType that will fail template validation 

988 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

989 if self.validationCanFail: 

990 with self.assertRaises(ValidationError): 

991 butler.validateConfiguration() 

992 

993 # Rerun validation but with a subset of dataset type names 

994 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

995 

996 # Rerun validation but ignore the bad datasetType 

997 butler.validateConfiguration( 

998 ignore=[ 

999 "test_metric_comp", 

1000 "metric3", 

1001 "metric5", 

1002 "calexp", 

1003 "DummySC", 

1004 "datasetType.component", 

1005 "random_data", 

1006 "random_data_2", 

1007 ] 

1008 ) 

1009 

1010 def testTransaction(self) -> None: 

1011 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1012 datasetTypeName = "test_metric" 

1013 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1014 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1015 ("instrument", {"instrument": "DummyCam"}), 

1016 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1017 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1018 ) 

1019 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1020 metric = makeExampleMetrics() 

1021 dataId = {"instrument": "DummyCam", "visit": 42} 

1022 # Create and register a DatasetType 

1023 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1024 with self.assertRaises(TransactionTestError): 

1025 with butler.transaction(): 

1026 # Add needed Dimensions 

1027 for args in dimensionEntries: 

1028 butler.registry.insertDimensionData(*args) 

1029 # Store a dataset 

1030 ref = butler.put(metric, datasetTypeName, dataId) 

1031 self.assertIsInstance(ref, DatasetRef) 

1032 # Test getDirect 

1033 metricOut = butler.get(ref) 

1034 self.assertEqual(metric, metricOut) 

1035 # Test get 

1036 metricOut = butler.get(datasetTypeName, dataId) 

1037 self.assertEqual(metric, metricOut) 

1038 # Check we can get components 

1039 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1040 raise TransactionTestError("This should roll back the entire transaction") 

1041 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1042 butler.registry.expandDataId(dataId) 

1043 # Should raise LookupError for missing data ID value 

1044 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1045 butler.get(datasetTypeName, dataId) 

1046 # Also check explicitly if Dataset entry is missing 

1047 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1048 # Direct retrieval should not find the file in the Datastore 

1049 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1050 butler.get(ref) 

1051 

1052 def testMakeRepo(self) -> None: 

1053 """Test that we can write butler configuration to a new repository via 

1054 the Butler.makeRepo interface and then instantiate a butler from the 

1055 repo root. 

1056 """ 

1057 # Do not run the test if we know this datastore configuration does 

1058 # not support a file system root 

1059 if self.fullConfigKey is None: 

1060 return 

1061 

1062 # create two separate directories 

1063 root1 = tempfile.mkdtemp(dir=self.root) 

1064 root2 = tempfile.mkdtemp(dir=self.root) 

1065 

1066 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1067 limited = Config(self.configFile) 

1068 butler1 = Butler(butlerConfig) 

1069 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1070 full = Config(self.tmpConfigFile) 

1071 butler2 = Butler(butlerConfig) 

1072 # Butlers should have the same configuration regardless of whether 

1073 # defaults were expanded. 

1074 self.assertEqual(butler1._config, butler2._config) 

1075 # Config files loaded directly should not be the same. 

1076 self.assertNotEqual(limited, full) 

1077 # Make sure "limited" doesn't have a few keys we know it should be 

1078 # inheriting from defaults. 

1079 self.assertIn(self.fullConfigKey, full) 

1080 self.assertNotIn(self.fullConfigKey, limited) 

1081 

1082 # Collections don't appear until something is put in them 

1083 collections1 = set(butler1.registry.queryCollections()) 

1084 self.assertEqual(collections1, set()) 

1085 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1086 

1087 # Check that a config with no associated file name will not 

1088 # work properly with relocatable Butler repo 

1089 butlerConfig.configFile = None 

1090 with self.assertRaises(ValueError): 

1091 Butler(butlerConfig) 

1092 

1093 with self.assertRaises(FileExistsError): 

1094 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1095 

1096 def testStringification(self) -> None: 

1097 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1098 butlerStr = str(butler) 

1099 

1100 if self.datastoreStr is not None: 

1101 for testStr in self.datastoreStr: 

1102 self.assertIn(testStr, butlerStr) 

1103 if self.registryStr is not None: 

1104 self.assertIn(self.registryStr, butlerStr) 

1105 

1106 datastoreName = butler.datastore.name 

1107 if self.datastoreName is not None: 

1108 for testStr in self.datastoreName: 

1109 self.assertIn(testStr, datastoreName) 

1110 

1111 def testButlerRewriteDataId(self) -> None: 

1112 """Test that dataIds can be rewritten based on dimension records.""" 

1113 

1114 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1115 

1116 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1117 datasetTypeName = "random_data" 

1118 

1119 # Create dimension records. 

1120 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1121 butler.registry.insertDimensionData( 

1122 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1123 ) 

1124 butler.registry.insertDimensionData( 

1125 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1126 ) 

1127 

1128 dimensions = butler.registry.dimensions.extract(["instrument", "exposure"]) 

1129 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1130 butler.registry.registerDatasetType(datasetType) 

1131 

1132 n_exposures = 5 

1133 dayobs = 20210530 

1134 

1135 for i in range(n_exposures): 

1136 butler.registry.insertDimensionData( 

1137 "exposure", 

1138 { 

1139 "instrument": "DummyCamComp", 

1140 "id": i, 

1141 "obs_id": f"exp{i}", 

1142 "seq_num": i, 

1143 "day_obs": dayobs, 

1144 "physical_filter": "d-r", 

1145 }, 

1146 ) 

1147 

1148 # Write some data. 

1149 for i in range(n_exposures): 

1150 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1151 

1152 # Use the seq_num for the put to test rewriting. 

1153 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1154 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1155 

1156 # Check that the exposure is correct in the dataId 

1157 self.assertEqual(ref.dataId["exposure"], i) 

1158 

1159 # and check that we can get the dataset back with the same dataId 

1160 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1161 self.assertEqual(new_metric, metric) 

1162 

1163 

1164class FileDatastoreButlerTests(ButlerTests): 

1165 """Common tests and specialization of ButlerTests for butlers backed 

1166 by datastores that inherit from FileDatastore. 

1167 """ 

1168 

1169 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1170 """Checks if file exists at a given path (relative to root). 

1171 

1172 Test testPutTemplates verifies actual physical existance of the files 

1173 in the requested location. 

1174 """ 

1175 uri = ResourcePath(root, forceDirectory=True) 

1176 return uri.join(relpath).exists() 

1177 

1178 def testPutTemplates(self) -> None: 

1179 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1180 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1181 

1182 # Add needed Dimensions 

1183 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1184 butler.registry.insertDimensionData( 

1185 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1186 ) 

1187 butler.registry.insertDimensionData( 

1188 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1189 ) 

1190 butler.registry.insertDimensionData( 

1191 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1192 ) 

1193 

1194 # Create and store a dataset 

1195 metric = makeExampleMetrics() 

1196 

1197 # Create two almost-identical DatasetTypes (both will use default 

1198 # template) 

1199 dimensions = butler.registry.dimensions.extract(["instrument", "visit"]) 

1200 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1201 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1202 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1203 

1204 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1205 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1206 

1207 # Put with exactly the data ID keys needed 

1208 ref = butler.put(metric, "metric1", dataId1) 

1209 uri = butler.getURI(ref) 

1210 self.assertTrue(uri.exists()) 

1211 self.assertTrue( 

1212 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1213 ) 

1214 

1215 # Check the template based on dimensions 

1216 if hasattr(butler.datastore, "templates"): 

1217 butler.datastore.templates.validateTemplates([ref]) 

1218 

1219 # Put with extra data ID keys (physical_filter is an optional 

1220 # dependency); should not change template (at least the way we're 

1221 # defining them to behave now; the important thing is that they 

1222 # must be consistent). 

1223 ref = butler.put(metric, "metric2", dataId2) 

1224 uri = butler.getURI(ref) 

1225 self.assertTrue(uri.exists()) 

1226 self.assertTrue( 

1227 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1228 ) 

1229 

1230 # Check the template based on dimensions 

1231 if hasattr(butler.datastore, "templates"): 

1232 butler.datastore.templates.validateTemplates([ref]) 

1233 

1234 # Use a template that has a typo in dimension record metadata. 

1235 # Easier to test with a butler that has a ref with records attached. 

1236 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1237 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1238 path = template.format(ref) 

1239 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1240 

1241 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1242 with self.assertRaises(KeyError): 

1243 with self.assertLogs("lsst.daf.butler.core.fileTemplates", "INFO"): 

1244 template.format(ref) 

1245 

1246 # Now use a file template that will not result in unique filenames 

1247 with self.assertRaises(FileTemplateValidationError): 

1248 butler.put(metric, "metric3", dataId1) 

1249 

1250 def testImportExport(self) -> None: 

1251 # Run put/get tests just to create and populate a repo. 

1252 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1253 self.runImportExportTest(storageClass) 

1254 

1255 @unittest.expectedFailure 

1256 def testImportExportVirtualComposite(self) -> None: 

1257 # Run put/get tests just to create and populate a repo. 

1258 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1259 self.runImportExportTest(storageClass) 

1260 

1261 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1262 """This test does an export to a temp directory and an import back 

1263 into a new temp directory repo. It does not assume a posix datastore""" 

1264 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1265 

1266 # Test that we must have a file extension. 

1267 with self.assertRaises(ValueError): 

1268 with exportButler.export(filename="dump", directory=".") as export: 

1269 pass 

1270 

1271 # Test that unknown format is not allowed. 

1272 with self.assertRaises(ValueError): 

1273 with exportButler.export(filename="dump.fits", directory=".") as export: 

1274 pass 

1275 

1276 # Test that the repo actually has at least one dataset. 

1277 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1278 self.assertGreater(len(datasets), 0) 

1279 # Add a DimensionRecord that's unused by those datasets. 

1280 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1281 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1282 # Export and then import datasets. 

1283 with safeTestTempDir(TESTDIR) as exportDir: 

1284 exportFile = os.path.join(exportDir, "exports.yaml") 

1285 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1286 export.saveDatasets(datasets) 

1287 # Export the same datasets again. This should quietly do 

1288 # nothing because of internal deduplication, and it shouldn't 

1289 # complain about being asked to export the "htm7" elements even 

1290 # though there aren't any in these datasets or in the database. 

1291 export.saveDatasets(datasets, elements=["htm7"]) 

1292 # Save one of the data IDs again; this should be harmless 

1293 # because of internal deduplication. 

1294 export.saveDataIds([datasets[0].dataId]) 

1295 # Save some dimension records directly. 

1296 export.saveDimensionData("skymap", [skymapRecord]) 

1297 self.assertTrue(os.path.exists(exportFile)) 

1298 with safeTestTempDir(TESTDIR) as importDir: 

1299 # We always want this to be a local posix butler 

1300 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1301 # Calling script.butlerImport tests the implementation of the 

1302 # butler command line interface "import" subcommand. Functions 

1303 # in the script folder are generally considered protected and 

1304 # should not be used as public api. 

1305 with open(exportFile, "r") as f: 

1306 script.butlerImport( 

1307 importDir, 

1308 export_file=f, 

1309 directory=exportDir, 

1310 transfer="auto", 

1311 skip_dimensions=None, 

1312 ) 

1313 importButler = Butler(importDir, run=self.default_run) 

1314 for ref in datasets: 

1315 with self.subTest(ref=ref): 

1316 # Test for existence by passing in the DatasetType and 

1317 # data ID separately, to avoid lookup by dataset_id. 

1318 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1319 self.assertEqual( 

1320 list(importButler.registry.queryDimensionRecords("skymap")), 

1321 [importButler.registry.dimensions["skymap"].RecordClass(**skymapRecord)], 

1322 ) 

1323 

1324 def testRemoveRuns(self) -> None: 

1325 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1326 butler = Butler(self.tmpConfigFile, writeable=True) 

1327 # Load registry data with dimensions to hang datasets off of. 

1328 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1329 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1330 # Add some RUN-type collection. 

1331 run1 = "run1" 

1332 butler.registry.registerRun(run1) 

1333 run2 = "run2" 

1334 butler.registry.registerRun(run2) 

1335 # put a dataset in each 

1336 metric = makeExampleMetrics() 

1337 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1338 datasetType = self.addDatasetType( 

1339 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1340 ) 

1341 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1342 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1343 uri1 = butler.getURI(ref1) 

1344 uri2 = butler.getURI(ref2) 

1345 

1346 with self.assertRaises(OrphanedRecordError): 

1347 butler.registry.removeDatasetType(datasetType.name) 

1348 

1349 # Remove from both runs with different values for unstore. 

1350 butler.removeRuns([run1], unstore=True) 

1351 butler.removeRuns([run2], unstore=False) 

1352 # Should be nothing in registry for either one, and datastore should 

1353 # not think either exists. 

1354 with self.assertRaises(MissingCollectionError): 

1355 butler.registry.getCollectionType(run1) 

1356 with self.assertRaises(MissingCollectionError): 

1357 butler.registry.getCollectionType(run2) 

1358 self.assertFalse(butler.datastore.exists(ref1)) 

1359 self.assertFalse(butler.datastore.exists(ref2)) 

1360 # The ref we unstored should be gone according to the URI, but the 

1361 # one we forgot should still be around. 

1362 self.assertFalse(uri1.exists()) 

1363 self.assertTrue(uri2.exists()) 

1364 

1365 # Now that the collections have been pruned we can remove the 

1366 # dataset type 

1367 butler.registry.removeDatasetType(datasetType.name) 

1368 

1369 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1370 butler.registry.removeDatasetType(tuple(["test*", "test*"])) 

1371 self.assertIn("not defined", "\n".join(cm.output)) 

1372 

1373 

1374class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1375 """PosixDatastore specialization of a butler""" 

1376 

1377 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1378 fullConfigKey: str | None = ".datastore.formatters" 

1379 validationCanFail = True 

1380 datastoreStr = ["/tmp"] 

1381 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1382 registryStr = "/gen3.sqlite3" 

1383 

1384 def testPathConstructor(self) -> None: 

1385 """Independent test of constructor using PathLike.""" 

1386 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1387 self.assertIsInstance(butler, Butler) 

1388 

1389 # And again with a Path object with the butler yaml 

1390 path = pathlib.Path(self.tmpConfigFile) 

1391 butler = Butler(path, writeable=False) 

1392 self.assertIsInstance(butler, Butler) 

1393 

1394 # And again with a Path object without the butler yaml 

1395 # (making sure we skip it if the tmp config doesn't end 

1396 # in butler.yaml -- which is the case for a subclass) 

1397 if self.tmpConfigFile.endswith("butler.yaml"): 

1398 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1399 butler = Butler(path, writeable=False) 

1400 self.assertIsInstance(butler, Butler) 

1401 

1402 def testExportTransferCopy(self) -> None: 

1403 """Test local export using all transfer modes""" 

1404 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1405 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1406 # Test that the repo actually has at least one dataset. 

1407 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1408 self.assertGreater(len(datasets), 0) 

1409 uris = [exportButler.getURI(d) for d in datasets] 

1410 assert isinstance(exportButler.datastore, FileDatastore) 

1411 datastoreRoot = exportButler.datastore.root 

1412 

1413 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1414 

1415 for path in pathsInStore: 

1416 # Assume local file system 

1417 assert path is not None 

1418 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1419 

1420 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1421 with safeTestTempDir(TESTDIR) as exportDir: 

1422 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1423 export.saveDatasets(datasets) 

1424 for path in pathsInStore: 

1425 assert path is not None 

1426 self.assertTrue( 

1427 self.checkFileExists(exportDir, path), 

1428 f"Check that mode {transfer} exported files", 

1429 ) 

1430 

1431 def testPruneDatasets(self) -> None: 

1432 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1433 butler = Butler(self.tmpConfigFile, writeable=True) 

1434 assert isinstance(butler.datastore, FileDatastore) 

1435 # Load registry data with dimensions to hang datasets off of. 

1436 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1437 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1438 # Add some RUN-type collections. 

1439 run1 = "run1" 

1440 butler.registry.registerRun(run1) 

1441 run2 = "run2" 

1442 butler.registry.registerRun(run2) 

1443 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1444 # different runs. ref3 has a different data ID. 

1445 metric = makeExampleMetrics() 

1446 dimensions = butler.registry.dimensions.extract(["instrument", "physical_filter"]) 

1447 datasetType = self.addDatasetType( 

1448 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1449 ) 

1450 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1451 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1452 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1453 

1454 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1455 for ref, stored in many_stored.items(): 

1456 self.assertTrue(stored, f"Ref {ref} should be stored") 

1457 

1458 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1459 for ref, exists in many_exists.items(): 

1460 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1461 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1462 

1463 # Simple prune. 

1464 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1465 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1466 

1467 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1468 for ref, stored in many_stored.items(): 

1469 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1470 

1471 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1472 for ref, exists in many_exists.items(): 

1473 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1474 

1475 # Put data back. 

1476 ref1_new = butler.put(metric, ref1) 

1477 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1478 ref2 = butler.put(metric, ref2) 

1479 

1480 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1481 self.assertTrue(many_stored[ref1]) 

1482 self.assertTrue(many_stored[ref2]) 

1483 self.assertFalse(many_stored[ref3]) 

1484 

1485 ref3 = butler.put(metric, ref3) 

1486 

1487 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1488 for ref, exists in many_exists.items(): 

1489 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1490 

1491 # Clear out the datasets from registry and start again. 

1492 refs = [ref1, ref2, ref3] 

1493 butler.pruneDatasets(refs, purge=True, unstore=True) 

1494 for ref in refs: 

1495 butler.put(metric, ref) 

1496 

1497 # Test different forms of file availability. 

1498 # Need to be in a state where: 

1499 # - one ref just has registry record. 

1500 # - one ref has a missing file but a datastore record. 

1501 # - one ref has a missing datastore record but file is there. 

1502 # - one ref does not exist anywhere. 

1503 # Do not need to test a ref that has everything since that is tested 

1504 # above. 

1505 ref0 = DatasetRef( 

1506 datasetType, 

1507 DataCoordinate.standardize( 

1508 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1509 ), 

1510 run=run1, 

1511 ) 

1512 

1513 # Delete from datastore and retain in Registry. 

1514 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1515 

1516 # File has been removed. 

1517 uri2 = butler.datastore.getURI(ref2) 

1518 uri2.remove() 

1519 

1520 # Datastore has lost track. 

1521 butler.datastore.forget([ref3]) 

1522 

1523 # First test with a standard butler. 

1524 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1525 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1526 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1527 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1528 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1529 

1530 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1531 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1532 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1533 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1534 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1535 self.assertTrue(exists_many[ref2]) 

1536 

1537 # Check that per-ref query gives the same answer as many query. 

1538 for ref, exists in exists_many.items(): 

1539 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1540 

1541 # Test again with a trusting butler. 

1542 butler.datastore.trustGetRequest = True 

1543 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1544 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1545 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1546 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1547 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1548 

1549 # Check that per-ref query gives the same answer as many query. 

1550 for ref, exists in exists_many.items(): 

1551 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1552 

1553 # Create a ref that surprisingly has the UUID of an existing ref 

1554 # but is not the same. 

1555 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1556 with self.assertRaises(ValueError): 

1557 butler.exists(ref_bad) 

1558 

1559 # Create a ref that has a compatible storage class. 

1560 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1561 exists = butler.exists(ref_compat) 

1562 self.assertEqual(exists, exists_many[ref2]) 

1563 

1564 # Remove everything and start from scratch. 

1565 butler.datastore.trustGetRequest = False 

1566 butler.pruneDatasets(refs, purge=True, unstore=True) 

1567 for ref in refs: 

1568 butler.put(metric, ref) 

1569 

1570 # These tests mess directly with the trash table and can leave the 

1571 # datastore in an odd state. Do them at the end. 

1572 # Check that in normal mode, deleting the record will lead to 

1573 # trash not touching the file. 

1574 uri1 = butler.datastore.getURI(ref1) 

1575 butler.datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1576 butler.datastore.forget([ref1]) 

1577 butler.datastore.trash(ref1) 

1578 butler.datastore.emptyTrash() 

1579 self.assertTrue(uri1.exists()) 

1580 uri1.remove() # Clean it up. 

1581 

1582 # Simulate execution butler setup by deleting the datastore 

1583 # record but keeping the file around and trusting. 

1584 butler.datastore.trustGetRequest = True 

1585 uri2 = butler.datastore.getURI(ref2) 

1586 uri3 = butler.datastore.getURI(ref3) 

1587 self.assertTrue(uri2.exists()) 

1588 self.assertTrue(uri3.exists()) 

1589 

1590 # Remove the datastore record. 

1591 butler.datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1592 butler.datastore.forget([ref2]) 

1593 self.assertTrue(uri2.exists()) 

1594 butler.datastore.trash([ref2, ref3]) 

1595 # Immediate removal for ref2 file 

1596 self.assertFalse(uri2.exists()) 

1597 # But ref3 has to wait for the empty. 

1598 self.assertTrue(uri3.exists()) 

1599 butler.datastore.emptyTrash() 

1600 self.assertFalse(uri3.exists()) 

1601 

1602 # Clear out the datasets from registry. 

1603 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1604 

1605 def testPytypeCoercion(self) -> None: 

1606 """Test python type coercion on Butler.get and put.""" 

1607 

1608 # Store some data with the normal example storage class. 

1609 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1610 datasetTypeName = "test_metric" 

1611 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1612 

1613 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1614 metric = butler.get(datasetTypeName, dataId=dataId) 

1615 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1616 

1617 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1618 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1619 

1620 # Now need to hack the registry dataset type definition. 

1621 # There is no API for this. 

1622 assert isinstance(butler.registry, SqlRegistry) 

1623 manager = butler.registry._managers.datasets 

1624 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1625 manager._db.update( 

1626 manager._static.dataset_type, 

1627 {"name": datasetTypeName}, 

1628 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1629 ) 

1630 

1631 # Force reset of dataset type cache 

1632 butler.registry.refresh() 

1633 

1634 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1635 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1636 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1637 

1638 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1639 self.assertNotEqual(type(metric_model), type(metric)) 

1640 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1641 

1642 # Put the model and read it back to show that everything now 

1643 # works as normal. 

1644 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1645 metric_model_new = butler.get(metric_ref) 

1646 self.assertEqual(metric_model_new, metric_model) 

1647 

1648 # Hack the storage class again to something that will fail on the 

1649 # get with no conversion class. 

1650 manager._db.update( 

1651 manager._static.dataset_type, 

1652 {"name": datasetTypeName}, 

1653 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1654 ) 

1655 butler.registry.refresh() 

1656 

1657 with self.assertRaises(ValueError): 

1658 butler.get(datasetTypeName, dataId=dataId) 

1659 

1660 

1661@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1662class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1663 """PosixDatastore specialization of a butler using Postgres""" 

1664 

1665 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1666 fullConfigKey = ".datastore.formatters" 

1667 validationCanFail = True 

1668 datastoreStr = ["/tmp"] 

1669 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1670 registryStr = "PostgreSQL@test" 

1671 postgresql: Any 

1672 

1673 @staticmethod 

1674 def _handler(postgresql: Any) -> None: 

1675 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1676 with engine.begin() as connection: 

1677 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1678 

1679 @classmethod 

1680 def setUpClass(cls) -> None: 

1681 # Create the postgres test server. 

1682 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1683 cache_initialized_db=True, on_initialized=cls._handler 

1684 ) 

1685 super().setUpClass() 

1686 

1687 @classmethod 

1688 def tearDownClass(cls) -> None: 

1689 # Clean up any lingering SQLAlchemy engines/connections 

1690 # so they're closed before we shut down the server. 

1691 gc.collect() 

1692 cls.postgresql.clear_cache() 

1693 super().tearDownClass() 

1694 

1695 def setUp(self) -> None: 

1696 self.server = self.postgresql() 

1697 

1698 # Need to add a registry section to the config. 

1699 self._temp_config = False 

1700 config = Config(self.configFile) 

1701 config["registry", "db"] = self.server.url() 

1702 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1703 config.dump(fh) 

1704 self.configFile = fh.name 

1705 self._temp_config = True 

1706 super().setUp() 

1707 

1708 def tearDown(self) -> None: 

1709 self.server.stop() 

1710 if self._temp_config and os.path.exists(self.configFile): 

1711 os.remove(self.configFile) 

1712 super().tearDown() 

1713 

1714 def testMakeRepo(self) -> None: 

1715 # The base class test assumes that it's using sqlite and assumes 

1716 # the config file is acceptable to sqlite. 

1717 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1718 

1719 

1720class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1721 """InMemoryDatastore specialization of a butler""" 

1722 

1723 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1724 fullConfigKey = None 

1725 useTempRoot = False 

1726 validationCanFail = False 

1727 datastoreStr = ["datastore='InMemory"] 

1728 datastoreName = ["InMemoryDatastore@"] 

1729 registryStr = "/gen3.sqlite3" 

1730 

1731 def testIngest(self) -> None: 

1732 pass 

1733 

1734 

1735class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1736 """PosixDatastore specialization""" 

1737 

1738 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1739 fullConfigKey = ".datastore.datastores.1.formatters" 

1740 validationCanFail = True 

1741 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1742 datastoreName = [ 

1743 "InMemoryDatastore@", 

1744 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1745 "SecondDatastore", 

1746 ] 

1747 registryStr = "/gen3.sqlite3" 

1748 

1749 

1750class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1751 """Test that a yaml file in one location can refer to a root in another.""" 

1752 

1753 datastoreStr = ["dir1"] 

1754 # Disable the makeRepo test since we are deliberately not using 

1755 # butler.yaml as the config name. 

1756 fullConfigKey = None 

1757 

1758 def setUp(self) -> None: 

1759 self.root = makeTestTempDir(TESTDIR) 

1760 

1761 # Make a new repository in one place 

1762 self.dir1 = os.path.join(self.root, "dir1") 

1763 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1764 

1765 # Move the yaml file to a different place and add a "root" 

1766 self.dir2 = os.path.join(self.root, "dir2") 

1767 os.makedirs(self.dir2, exist_ok=True) 

1768 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1769 config = Config(configFile1) 

1770 config["root"] = self.dir1 

1771 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1772 config.dumpToUri(configFile2) 

1773 os.remove(configFile1) 

1774 self.tmpConfigFile = configFile2 

1775 

1776 def testFileLocations(self) -> None: 

1777 self.assertNotEqual(self.dir1, self.dir2) 

1778 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1779 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1780 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1781 

1782 

1783class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1784 """Test that a config file created by makeRepo outside of repo works.""" 

1785 

1786 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1787 

1788 def setUp(self) -> None: 

1789 self.root = makeTestTempDir(TESTDIR) 

1790 self.root2 = makeTestTempDir(TESTDIR) 

1791 

1792 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1793 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1794 

1795 def tearDown(self) -> None: 

1796 if os.path.exists(self.root2): 

1797 shutil.rmtree(self.root2, ignore_errors=True) 

1798 super().tearDown() 

1799 

1800 def testConfigExistence(self) -> None: 

1801 c = Config(self.tmpConfigFile) 

1802 uri_config = ResourcePath(c["root"]) 

1803 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1804 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1805 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1806 

1807 def testPutGet(self) -> None: 

1808 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1809 self.runPutGetTest(storageClass, "test_metric") 

1810 

1811 

1812class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1813 """Test that a config file created by makeRepo outside of repo works.""" 

1814 

1815 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1816 

1817 def setUp(self) -> None: 

1818 self.root = makeTestTempDir(TESTDIR) 

1819 self.root2 = makeTestTempDir(TESTDIR) 

1820 

1821 self.tmpConfigFile = self.root2 

1822 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1823 

1824 def testConfigExistence(self) -> None: 

1825 # Append the yaml file else Config constructor does not know the file 

1826 # type. 

1827 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1828 super().testConfigExistence() 

1829 

1830 

1831class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1832 """Test that a config file created by makeRepo outside of repo works.""" 

1833 

1834 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1835 

1836 def setUp(self) -> None: 

1837 self.root = makeTestTempDir(TESTDIR) 

1838 self.root2 = makeTestTempDir(TESTDIR) 

1839 

1840 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1841 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1842 

1843 

1844@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1845class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1846 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1847 a local in-memory SqlRegistry. 

1848 """ 

1849 

1850 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1851 fullConfigKey = None 

1852 validationCanFail = True 

1853 

1854 bucketName = "anybucketname" 

1855 """Name of the Bucket that will be used in the tests. The name is read from 

1856 the config file used with the tests during set-up. 

1857 """ 

1858 

1859 root = "butlerRoot/" 

1860 """Root repository directory expected to be used in case useTempRoot=False. 

1861 Otherwise the root is set to a 20 characters long randomly generated string 

1862 during set-up. 

1863 """ 

1864 

1865 datastoreStr = [f"datastore={root}"] 

1866 """Contains all expected root locations in a format expected to be 

1867 returned by Butler stringification. 

1868 """ 

1869 

1870 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1871 """The expected format of the S3 Datastore string.""" 

1872 

1873 registryStr = "/gen3.sqlite3" 

1874 """Expected format of the Registry string.""" 

1875 

1876 mock_s3 = mock_s3() 

1877 """The mocked s3 interface from moto.""" 

1878 

1879 def genRoot(self) -> str: 

1880 """Returns a random string of len 20 to serve as a root 

1881 name for the temporary bucket repo. 

1882 

1883 This is equivalent to tempfile.mkdtemp as this is what self.root 

1884 becomes when useTempRoot is True. 

1885 """ 

1886 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1887 return rndstr + "/" 

1888 

1889 def setUp(self) -> None: 

1890 config = Config(self.configFile) 

1891 uri = ResourcePath(config[".datastore.datastore.root"]) 

1892 self.bucketName = uri.netloc 

1893 

1894 # Enable S3 mocking of tests. 

1895 self.mock_s3.start() 

1896 

1897 # set up some fake credentials if they do not exist 

1898 self.usingDummyCredentials = setAwsEnvCredentials() 

1899 

1900 if self.useTempRoot: 

1901 self.root = self.genRoot() 

1902 rooturi = f"s3://{self.bucketName}/{self.root}" 

1903 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1904 

1905 # need local folder to store registry database 

1906 self.reg_dir = makeTestTempDir(TESTDIR) 

1907 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1908 

1909 # MOTO needs to know that we expect Bucket bucketname to exist 

1910 # (this used to be the class attribute bucketName) 

1911 s3 = boto3.resource("s3") 

1912 s3.create_bucket(Bucket=self.bucketName) 

1913 

1914 self.datastoreStr = [f"datastore='{rooturi}'"] 

1915 self.datastoreName = [f"FileDatastore@{rooturi}"] 

1916 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

1917 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

1918 

1919 def tearDown(self) -> None: 

1920 s3 = boto3.resource("s3") 

1921 bucket = s3.Bucket(self.bucketName) 

1922 try: 

1923 bucket.objects.all().delete() 

1924 except botocore.exceptions.ClientError as e: 

1925 if e.response["Error"]["Code"] == "404": 

1926 # the key was not reachable - pass 

1927 pass 

1928 else: 

1929 raise 

1930 

1931 bucket = s3.Bucket(self.bucketName) 

1932 bucket.delete() 

1933 

1934 # Stop the S3 mock. 

1935 self.mock_s3.stop() 

1936 

1937 # unset any potentially set dummy credentials 

1938 if self.usingDummyCredentials: 

1939 unsetAwsEnvCredentials() 

1940 

1941 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

1942 shutil.rmtree(self.reg_dir, ignore_errors=True) 

1943 

1944 if self.useTempRoot and os.path.exists(self.root): 

1945 shutil.rmtree(self.root, ignore_errors=True) 

1946 

1947 super().tearDown() 

1948 

1949 

1950class PosixDatastoreTransfers(unittest.TestCase): 

1951 """Test data transfers between butlers. 

1952 

1953 Test for different managers. UUID to UUID and integer to integer are 

1954 tested. UUID to integer is not supported since we do not currently 

1955 want to allow that. Integer to UUID is supported with the caveat 

1956 that UUID4 will be generated and this will be incorrect for raw 

1957 dataset types. The test ignores that. 

1958 """ 

1959 

1960 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1961 storageClassFactory: StorageClassFactory 

1962 

1963 @classmethod 

1964 def setUpClass(cls) -> None: 

1965 cls.storageClassFactory = StorageClassFactory() 

1966 cls.storageClassFactory.addFromConfig(cls.configFile) 

1967 

1968 def setUp(self) -> None: 

1969 self.root = makeTestTempDir(TESTDIR) 

1970 self.config = Config(self.configFile) 

1971 

1972 def tearDown(self) -> None: 

1973 removeTestTempDir(self.root) 

1974 

1975 def create_butler(self, manager: str, label: str) -> Butler: 

1976 config = Config(self.configFile) 

1977 config["registry", "managers", "datasets"] = manager 

1978 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

1979 

1980 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

1981 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

1982 if manager1 is None: 

1983 manager1 = default 

1984 if manager2 is None: 

1985 manager2 = default 

1986 self.source_butler = self.create_butler(manager1, "1") 

1987 self.target_butler = self.create_butler(manager2, "2") 

1988 

1989 def testTransferUuidToUuid(self) -> None: 

1990 self.create_butlers() 

1991 self.assertButlerTransfers() 

1992 

1993 def _enable_trust(self, datastore: Datastore) -> None: 

1994 if hasattr(datastore, "trustGetRequest"): 

1995 datastore.trustGetRequest = True 

1996 elif hasattr(datastore, "datastores"): 

1997 for datastore in datastore.datastores: 

1998 if hasattr(datastore, "trustGetRequest"): 

1999 datastore.trustGetRequest = True 

2000 

2001 def testTransferMissing(self) -> None: 

2002 """Test transfers where datastore records are missing. 

2003 

2004 This is how execution butler works. 

2005 """ 

2006 self.create_butlers() 

2007 

2008 # Configure the source butler to allow trust. 

2009 self._enable_trust(self.source_butler.datastore) 

2010 

2011 self.assertButlerTransfers(purge=True) 

2012 

2013 def testTransferMissingDisassembly(self) -> None: 

2014 """Test transfers where datastore records are missing. 

2015 

2016 This is how execution butler works. 

2017 """ 

2018 self.create_butlers() 

2019 

2020 # Configure the source butler to allow trust. 

2021 self._enable_trust(self.source_butler.datastore) 

2022 

2023 # Test disassembly. 

2024 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2025 

2026 def testAbsoluteURITransferDirect(self) -> None: 

2027 """Test transfer using an absolute URI.""" 

2028 self._absolute_transfer("auto") 

2029 

2030 def testAbsoluteURITransferCopy(self) -> None: 

2031 """Test transfer using an absolute URI.""" 

2032 self._absolute_transfer("copy") 

2033 

2034 def _absolute_transfer(self, transfer: str) -> None: 

2035 self.create_butlers() 

2036 

2037 storageClassName = "StructuredData" 

2038 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2039 datasetTypeName = "random_data" 

2040 run = "run1" 

2041 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2042 

2043 dimensions = self.source_butler.registry.dimensions.extract(()) 

2044 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2045 self.source_butler.registry.registerDatasetType(datasetType) 

2046 

2047 metrics = makeExampleMetrics() 

2048 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2049 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

2050 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2051 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2052 dataset = FileDataset(path=temp, refs=source_refs) 

2053 self.source_butler.ingest(dataset, transfer="direct") 

2054 

2055 self.target_butler.transfer_from( 

2056 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2057 ) 

2058 

2059 uri = self.target_butler.getURI(dataset.refs[0]) 

2060 if transfer == "auto": 

2061 self.assertEqual(uri, temp) 

2062 else: 

2063 self.assertNotEqual(uri, temp) 

2064 

2065 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2066 """Test that a run can be transferred to another butler.""" 

2067 

2068 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2069 datasetTypeName = "random_data" 

2070 

2071 # Test will create 3 collections and we will want to transfer 

2072 # two of those three. 

2073 runs = ["run1", "run2", "other"] 

2074 

2075 # Also want to use two different dataset types to ensure that 

2076 # grouping works. 

2077 datasetTypeNames = ["random_data", "random_data_2"] 

2078 

2079 # Create the run collections in the source butler. 

2080 for run in runs: 

2081 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2082 

2083 # Create dimensions in source butler. 

2084 n_exposures = 30 

2085 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2086 self.source_butler.registry.insertDimensionData( 

2087 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2088 ) 

2089 self.source_butler.registry.insertDimensionData( 

2090 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2091 ) 

2092 

2093 for i in range(n_exposures): 

2094 self.source_butler.registry.insertDimensionData( 

2095 "exposure", 

2096 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2097 ) 

2098 

2099 # Create dataset types in the source butler. 

2100 dimensions = self.source_butler.registry.dimensions.extract(["instrument", "exposure"]) 

2101 for datasetTypeName in datasetTypeNames: 

2102 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2103 self.source_butler.registry.registerDatasetType(datasetType) 

2104 

2105 # Write a dataset to an unrelated run -- this will ensure that 

2106 # we are rewriting integer dataset ids in the target if necessary. 

2107 # Will not be relevant for UUID. 

2108 run = "distraction" 

2109 butler = Butler(butler=self.source_butler, run=run) 

2110 butler.put( 

2111 makeExampleMetrics(), 

2112 datasetTypeName, 

2113 exposure=1, 

2114 instrument="DummyCamComp", 

2115 physical_filter="d-r", 

2116 ) 

2117 

2118 # Write some example metrics to the source 

2119 butler = Butler(butler=self.source_butler) 

2120 

2121 # Set of DatasetRefs that should be in the list of refs to transfer 

2122 # but which will not be transferred. 

2123 deleted: set[DatasetRef] = set() 

2124 

2125 n_expected = 20 # Number of datasets expected to be transferred 

2126 source_refs = [] 

2127 for i in range(n_exposures): 

2128 # Put a third of datasets into each collection, only retain 

2129 # two thirds. 

2130 index = i % 3 

2131 run = runs[index] 

2132 datasetTypeName = datasetTypeNames[i % 2] 

2133 

2134 metric = MetricsExample( 

2135 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2136 ) 

2137 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2138 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2139 

2140 # Remove the datastore record using low-level API 

2141 if purge: 

2142 # Remove records for a fraction. 

2143 if index == 1: 

2144 # For one of these delete the file as well. 

2145 # This allows the "missing" code to filter the 

2146 # file out. 

2147 # Access the individual datastores. 

2148 datastores = [] 

2149 if hasattr(butler.datastore, "datastores"): 

2150 datastores.extend(butler.datastore.datastores) 

2151 else: 

2152 datastores.append(butler.datastore) 

2153 

2154 if not deleted: 

2155 # For a chained datastore we need to remove 

2156 # files in each chain. 

2157 for datastore in datastores: 

2158 # The file might not be known to the datastore 

2159 # if constraints are used. 

2160 try: 

2161 primary, uris = datastore.getURIs(ref) 

2162 except FileNotFoundError: 

2163 continue 

2164 if primary: 

2165 if primary.scheme != "mem": 

2166 primary.remove() 

2167 for uri in uris.values(): 

2168 if uri.scheme != "mem": 

2169 uri.remove() 

2170 n_expected -= 1 

2171 deleted.add(ref) 

2172 

2173 # Remove the datastore record. 

2174 for datastore in datastores: 

2175 if hasattr(datastore, "removeStoredItemInfo"): 

2176 datastore.removeStoredItemInfo(ref) 

2177 

2178 if index < 2: 

2179 source_refs.append(ref) 

2180 if ref not in deleted: 

2181 new_metric = butler.get(ref) 

2182 self.assertEqual(new_metric, metric) 

2183 

2184 # Create some bad dataset types to ensure we check for inconsistent 

2185 # definitions. 

2186 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2187 for datasetTypeName in datasetTypeNames: 

2188 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2189 self.target_butler.registry.registerDatasetType(datasetType) 

2190 with self.assertRaises(ConflictingDefinitionError) as cm: 

2191 self.target_butler.transfer_from(self.source_butler, source_refs) 

2192 self.assertIn("dataset type differs", str(cm.exception)) 

2193 

2194 # And remove the bad definitions. 

2195 for datasetTypeName in datasetTypeNames: 

2196 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2197 

2198 # Transfer without creating dataset types should fail. 

2199 with self.assertRaises(KeyError): 

2200 self.target_butler.transfer_from(self.source_butler, source_refs) 

2201 

2202 # Transfer without creating dimensions should fail. 

2203 with self.assertRaises(ConflictingDefinitionError) as cm: 

2204 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2205 self.assertIn("dimension", str(cm.exception)) 

2206 

2207 # The failed transfer above leaves registry in an inconsistent 

2208 # state because the run is created but then rolled back without 

2209 # the collection cache being cleared. For now force a refresh. 

2210 # Can remove with DM-35498. 

2211 self.target_butler.registry.refresh() 

2212 

2213 # Now transfer them to the second butler, including dimensions. 

2214 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2215 transferred = self.target_butler.transfer_from( 

2216 self.source_butler, 

2217 source_refs, 

2218 register_dataset_types=True, 

2219 transfer_dimensions=True, 

2220 ) 

2221 self.assertEqual(len(transferred), n_expected) 

2222 log_output = ";".join(log_cm.output) 

2223 

2224 # A ChainedDatastore will use the in-memory datastore for mexists 

2225 # so we can not rely on the mexists log message. 

2226 self.assertIn("Number of datastore records found in source", log_output) 

2227 self.assertIn("Creating output run", log_output) 

2228 

2229 # Do the transfer twice to ensure that it will do nothing extra. 

2230 # Only do this if purge=True because it does not work for int 

2231 # dataset_id. 

2232 if purge: 

2233 # This should not need to register dataset types. 

2234 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2235 self.assertEqual(len(transferred), n_expected) 

2236 

2237 # Also do an explicit low-level transfer to trigger some 

2238 # edge cases. 

2239 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2240 self.target_butler.datastore.transfer_from(self.source_butler.datastore, source_refs) 

2241 log_output = ";".join(log_cm.output) 

2242 self.assertIn("no file artifacts exist", log_output) 

2243 

2244 with self.assertRaises((TypeError, AttributeError)): 

2245 self.target_butler.datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2246 

2247 with self.assertRaises(ValueError): 

2248 self.target_butler.datastore.transfer_from( 

2249 self.source_butler.datastore, source_refs, transfer="split" 

2250 ) 

2251 

2252 # Now try to get the same refs from the new butler. 

2253 for ref in source_refs: 

2254 if ref not in deleted: 

2255 new_metric = self.target_butler.get(ref) 

2256 old_metric = self.source_butler.get(ref) 

2257 self.assertEqual(new_metric, old_metric) 

2258 

2259 # Now prune run2 collection and create instead a CHAINED collection. 

2260 # This should block the transfer. 

2261 self.target_butler.removeRuns(["run2"], unstore=True) 

2262 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2263 with self.assertRaises(CollectionTypeError): 

2264 # Re-importing the run1 datasets can be problematic if they 

2265 # use integer IDs so filter those out. 

2266 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2267 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2268 

2269 

2270class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2271 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2272 

2273 

2274if __name__ == "__main__": 

2275 unittest.main()