Coverage for tests/test_butler.py: 13%

1306 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Tests for Butler. 

29""" 

30from __future__ import annotations 

31 

32import gc 

33import json 

34import logging 

35import os 

36import pathlib 

37import pickle 

38import posixpath 

39import random 

40import shutil 

41import string 

42import tempfile 

43import unittest 

44import uuid 

45from collections.abc import Mapping 

46from typing import TYPE_CHECKING, Any, cast 

47 

48try: 

49 import boto3 

50 import botocore 

51 from lsst.resources.s3utils import setAwsEnvCredentials, unsetAwsEnvCredentials 

52 from moto import mock_s3 # type: ignore[import] 

53except ImportError: 

54 boto3 = None 

55 

56 def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] 

57 """No-op decorator in case moto mock_s3 can not be imported.""" 

58 return None 

59 

60 

61try: 

62 # It's possible but silly to have testing.postgresql installed without 

63 # having the postgresql server installed (because then nothing in 

64 # testing.postgresql would work), so we use the presence of that module 

65 # to test whether we can expect the server to be available. 

66 import testing.postgresql # type: ignore[import] 

67except ImportError: 

68 testing = None 

69 

70import astropy.time 

71import sqlalchemy 

72from lsst.daf.butler import ( 

73 Butler, 

74 ButlerConfig, 

75 ButlerRepoIndex, 

76 CollectionType, 

77 Config, 

78 DataCoordinate, 

79 DatasetExistence, 

80 DatasetRef, 

81 DatasetType, 

82 FileDataset, 

83 StorageClassFactory, 

84 ValidationError, 

85 script, 

86) 

87from lsst.daf.butler.datastore import NullDatastore 

88from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError 

89from lsst.daf.butler.datastores.fileDatastore import FileDatastore 

90from lsst.daf.butler.registries.sql import SqlRegistry 

91from lsst.daf.butler.registry import ( 

92 CollectionError, 

93 CollectionTypeError, 

94 ConflictingDefinitionError, 

95 DataIdValueError, 

96 MissingCollectionError, 

97 OrphanedRecordError, 

98) 

99from lsst.daf.butler.repo_relocation import BUTLER_ROOT_TAG 

100from lsst.daf.butler.tests import MetricsExample, MultiDetectorFormatter 

101from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir, safeTestTempDir 

102from lsst.resources import ResourcePath 

103from lsst.utils import doImportType 

104from lsst.utils.introspection import get_full_type_name 

105 

106if TYPE_CHECKING: 

107 import types 

108 

109 from lsst.daf.butler import Datastore, DimensionGraph, Registry, StorageClass 

110 

111TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

112 

113 

114def clean_environment() -> None: 

115 """Remove external environment variables that affect the tests.""" 

116 for k in ( 

117 "DAF_BUTLER_REPOSITORY_INDEX", 

118 "S3_ENDPOINT_URL", 

119 "AWS_ACCESS_KEY_ID", 

120 "AWS_SECRET_ACCESS_KEY", 

121 "AWS_SHARED_CREDENTIALS_FILE", 

122 ): 

123 os.environ.pop(k, None) 

124 

125 

126def makeExampleMetrics() -> MetricsExample: 

127 """Return example dataset suitable for tests.""" 

128 return MetricsExample( 

129 {"AM1": 5.2, "AM2": 30.6}, 

130 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

131 [563, 234, 456.7, 752, 8, 9, 27], 

132 ) 

133 

134 

135class TransactionTestError(Exception): 

136 """Specific error for testing transactions, to prevent misdiagnosing 

137 that might otherwise occur when a standard exception is used. 

138 """ 

139 

140 pass 

141 

142 

143class ButlerConfigTests(unittest.TestCase): 

144 """Simple tests for ButlerConfig that are not tested in any other test 

145 cases. 

146 """ 

147 

148 def testSearchPath(self) -> None: 

149 configFile = os.path.join(TESTDIR, "config", "basic", "butler.yaml") 

150 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

151 config1 = ButlerConfig(configFile) 

152 self.assertNotIn("testConfigs", "\n".join(cm.output)) 

153 

154 overrideDirectory = os.path.join(TESTDIR, "config", "testConfigs") 

155 with self.assertLogs("lsst.daf.butler", level="DEBUG") as cm: 

156 config2 = ButlerConfig(configFile, searchPaths=[overrideDirectory]) 

157 self.assertIn("testConfigs", "\n".join(cm.output)) 

158 

159 key = ("datastore", "records", "table") 

160 self.assertNotEqual(config1[key], config2[key]) 

161 self.assertEqual(config2[key], "override_record") 

162 

163 

164class ButlerPutGetTests(TestCaseMixin): 

165 """Helper method for running a suite of put/get tests from different 

166 butler configurations. 

167 """ 

168 

169 root: str 

170 default_run = "ingésτ😺" 

171 storageClassFactory: StorageClassFactory 

172 configFile: str 

173 tmpConfigFile: str 

174 

175 @staticmethod 

176 def addDatasetType( 

177 datasetTypeName: str, dimensions: DimensionGraph, storageClass: StorageClass | str, registry: Registry 

178 ) -> DatasetType: 

179 """Create a DatasetType and register it""" 

180 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

181 registry.registerDatasetType(datasetType) 

182 return datasetType 

183 

184 @classmethod 

185 def setUpClass(cls) -> None: 

186 cls.storageClassFactory = StorageClassFactory() 

187 cls.storageClassFactory.addFromConfig(cls.configFile) 

188 

189 def assertGetComponents( 

190 self, 

191 butler: Butler, 

192 datasetRef: DatasetRef, 

193 components: tuple[str, ...], 

194 reference: Any, 

195 collections: Any = None, 

196 ) -> None: 

197 datasetType = datasetRef.datasetType 

198 dataId = datasetRef.dataId 

199 deferred = butler.getDeferred(datasetRef) 

200 

201 for component in components: 

202 compTypeName = datasetType.componentTypeName(component) 

203 result = butler.get(compTypeName, dataId, collections=collections) 

204 self.assertEqual(result, getattr(reference, component)) 

205 result_deferred = deferred.get(component=component) 

206 self.assertEqual(result_deferred, result) 

207 

208 def tearDown(self) -> None: 

209 removeTestTempDir(self.root) 

210 

211 def create_butler( 

212 self, run: str, storageClass: StorageClass | str, datasetTypeName: str 

213 ) -> tuple[Butler, DatasetType]: 

214 butler = Butler(self.tmpConfigFile, run=run) 

215 

216 collections = set(butler.registry.queryCollections()) 

217 self.assertEqual(collections, {run}) 

218 

219 # Create and register a DatasetType 

220 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

221 

222 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

223 

224 # Add needed Dimensions 

225 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

226 butler.registry.insertDimensionData( 

227 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

228 ) 

229 butler.registry.insertDimensionData( 

230 "visit_system", {"instrument": "DummyCamComp", "id": 1, "name": "default"} 

231 ) 

232 visit_start = astropy.time.Time("2020-01-01 08:00:00.123456789", scale="tai") 

233 visit_end = astropy.time.Time("2020-01-01 08:00:36.66", scale="tai") 

234 butler.registry.insertDimensionData( 

235 "visit", 

236 { 

237 "instrument": "DummyCamComp", 

238 "id": 423, 

239 "name": "fourtwentythree", 

240 "physical_filter": "d-r", 

241 "visit_system": 1, 

242 "datetime_begin": visit_start, 

243 "datetime_end": visit_end, 

244 }, 

245 ) 

246 

247 # Add more visits for some later tests 

248 for visit_id in (424, 425): 

249 butler.registry.insertDimensionData( 

250 "visit", 

251 { 

252 "instrument": "DummyCamComp", 

253 "id": visit_id, 

254 "name": f"fourtwentyfour_{visit_id}", 

255 "physical_filter": "d-r", 

256 "visit_system": 1, 

257 }, 

258 ) 

259 return butler, datasetType 

260 

261 def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: 

262 # New datasets will be added to run and tag, but we will only look in 

263 # tag when looking up datasets. 

264 run = self.default_run 

265 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

266 assert butler.run is not None 

267 

268 # Create and store a dataset 

269 metric = makeExampleMetrics() 

270 dataId = butler.registry.expandDataId({"instrument": "DummyCamComp", "visit": 423}) 

271 

272 # Put and remove the dataset once as a DatasetRef, once as a dataId, 

273 # and once with a DatasetType 

274 

275 # Keep track of any collections we add and do not clean up 

276 expected_collections = {run} 

277 

278 counter = 0 

279 ref = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run="put_run_1") 

280 args = tuple[DatasetRef] | tuple[str | DatasetType, DataCoordinate] 

281 for args in ((ref,), (datasetTypeName, dataId), (datasetType, dataId)): 

282 # Since we are using subTest we can get cascading failures 

283 # here with the first attempt failing and the others failing 

284 # immediately because the dataset already exists. Work around 

285 # this by using a distinct run collection each time 

286 counter += 1 

287 this_run = f"put_run_{counter}" 

288 butler.registry.registerCollection(this_run, type=CollectionType.RUN) 

289 expected_collections.update({this_run}) 

290 

291 with self.subTest(args=args): 

292 kwargs: dict[str, Any] = {} 

293 if not isinstance(args[0], DatasetRef): # type: ignore 

294 kwargs["run"] = this_run 

295 ref = butler.put(metric, *args, **kwargs) 

296 self.assertIsInstance(ref, DatasetRef) 

297 

298 # Test getDirect 

299 metricOut = butler.get(ref) 

300 self.assertEqual(metric, metricOut) 

301 # Test get 

302 metricOut = butler.get(ref.datasetType.name, dataId, collections=this_run) 

303 self.assertEqual(metric, metricOut) 

304 # Test get with a datasetRef 

305 metricOut = butler.get(ref) 

306 self.assertEqual(metric, metricOut) 

307 # Test getDeferred with dataId 

308 metricOut = butler.getDeferred(ref.datasetType.name, dataId, collections=this_run).get() 

309 self.assertEqual(metric, metricOut) 

310 # Test getDeferred with a ref 

311 metricOut = butler.getDeferred(ref).get() 

312 self.assertEqual(metric, metricOut) 

313 

314 # Check we can get components 

315 if storageClass.isComposite(): 

316 self.assertGetComponents( 

317 butler, ref, ("summary", "data", "output"), metric, collections=this_run 

318 ) 

319 

320 # Can the artifacts themselves be retrieved? 

321 if not butler._datastore.isEphemeral: 

322 root_uri = ResourcePath(self.root) 

323 

324 for preserve_path in (True, False): 

325 destination = root_uri.join(f"artifacts/{preserve_path}_{counter}/") 

326 # Use copy so that we can test that overwrite 

327 # protection works (using "auto" for File URIs would 

328 # use hard links and subsequent transfer would work 

329 # because it knows they are the same file). 

330 transferred = butler.retrieveArtifacts( 

331 [ref], destination, preserve_path=preserve_path, transfer="copy" 

332 ) 

333 self.assertGreater(len(transferred), 0) 

334 artifacts = list(ResourcePath.findFileResources([destination])) 

335 self.assertEqual(set(transferred), set(artifacts)) 

336 

337 for artifact in transferred: 

338 path_in_destination = artifact.relative_to(destination) 

339 self.assertIsNotNone(path_in_destination) 

340 assert path_in_destination is not None 

341 

342 # when path is not preserved there should not be 

343 # any path separators. 

344 num_seps = path_in_destination.count("/") 

345 if preserve_path: 

346 self.assertGreater(num_seps, 0) 

347 else: 

348 self.assertEqual(num_seps, 0) 

349 

350 primary_uri, secondary_uris = butler.getURIs(ref) 

351 n_uris = len(secondary_uris) 

352 if primary_uri: 

353 n_uris += 1 

354 self.assertEqual( 

355 len(artifacts), 

356 n_uris, 

357 "Comparing expected artifacts vs actual:" 

358 f" {artifacts} vs {primary_uri} and {secondary_uris}", 

359 ) 

360 

361 if preserve_path: 

362 # No need to run these twice 

363 with self.assertRaises(ValueError): 

364 butler.retrieveArtifacts([ref], destination, transfer="move") 

365 

366 with self.assertRaises(FileExistsError): 

367 butler.retrieveArtifacts([ref], destination) 

368 

369 transferred_again = butler.retrieveArtifacts( 

370 [ref], destination, preserve_path=preserve_path, overwrite=True 

371 ) 

372 self.assertEqual(set(transferred_again), set(transferred)) 

373 

374 # Now remove the dataset completely. 

375 butler.pruneDatasets([ref], purge=True, unstore=True) 

376 # Lookup with original args should still fail. 

377 kwargs = {"collections": this_run} 

378 if isinstance(args[0], DatasetRef): 

379 kwargs = {} # Prevent warning from being issued. 

380 self.assertFalse(butler.exists(*args, **kwargs)) 

381 # get() should still fail. 

382 with self.assertRaises(FileNotFoundError): 

383 butler.get(ref) 

384 # Registry shouldn't be able to find it by dataset_id anymore. 

385 self.assertIsNone(butler.registry.getDataset(ref.id)) 

386 

387 # Do explicit registry removal since we know they are 

388 # empty 

389 butler.registry.removeCollection(this_run) 

390 expected_collections.remove(this_run) 

391 

392 # Create DatasetRef for put using default run. 

393 refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run) 

394 

395 # Check that getDeferred fails with standalone ref. 

396 with self.assertRaises(LookupError): 

397 butler.getDeferred(refIn) 

398 

399 # Put the dataset again, since the last thing we did was remove it 

400 # and we want to use the default collection. 

401 ref = butler.put(metric, refIn) 

402 

403 # Get with parameters 

404 stop = 4 

405 sliced = butler.get(ref, parameters={"slice": slice(stop)}) 

406 self.assertNotEqual(metric, sliced) 

407 self.assertEqual(metric.summary, sliced.summary) 

408 self.assertEqual(metric.output, sliced.output) 

409 assert metric.data is not None # for mypy 

410 self.assertEqual(metric.data[:stop], sliced.data) 

411 # getDeferred with parameters 

412 sliced = butler.getDeferred(ref, parameters={"slice": slice(stop)}).get() 

413 self.assertNotEqual(metric, sliced) 

414 self.assertEqual(metric.summary, sliced.summary) 

415 self.assertEqual(metric.output, sliced.output) 

416 self.assertEqual(metric.data[:stop], sliced.data) 

417 # getDeferred with deferred parameters 

418 sliced = butler.getDeferred(ref).get(parameters={"slice": slice(stop)}) 

419 self.assertNotEqual(metric, sliced) 

420 self.assertEqual(metric.summary, sliced.summary) 

421 self.assertEqual(metric.output, sliced.output) 

422 self.assertEqual(metric.data[:stop], sliced.data) 

423 

424 if storageClass.isComposite(): 

425 # Check that components can be retrieved 

426 metricOut = butler.get(ref.datasetType.name, dataId) 

427 compNameS = ref.datasetType.componentTypeName("summary") 

428 compNameD = ref.datasetType.componentTypeName("data") 

429 summary = butler.get(compNameS, dataId) 

430 self.assertEqual(summary, metric.summary) 

431 data = butler.get(compNameD, dataId) 

432 self.assertEqual(data, metric.data) 

433 

434 if "counter" in storageClass.derivedComponents: 

435 count = butler.get(ref.datasetType.componentTypeName("counter"), dataId) 

436 self.assertEqual(count, len(data)) 

437 

438 count = butler.get( 

439 ref.datasetType.componentTypeName("counter"), dataId, parameters={"slice": slice(stop)} 

440 ) 

441 self.assertEqual(count, stop) 

442 

443 compRef = butler.registry.findDataset(compNameS, dataId, collections=butler.collections) 

444 assert compRef is not None 

445 summary = butler.get(compRef) 

446 self.assertEqual(summary, metric.summary) 

447 

448 # Create a Dataset type that has the same name but is inconsistent. 

449 inconsistentDatasetType = DatasetType( 

450 datasetTypeName, datasetType.dimensions, self.storageClassFactory.getStorageClass("Config") 

451 ) 

452 

453 # Getting with a dataset type that does not match registry fails 

454 with self.assertRaisesRegex(ValueError, "Supplied dataset type .* inconsistent with registry"): 

455 butler.get(inconsistentDatasetType, dataId) 

456 

457 # Combining a DatasetRef with a dataId should fail 

458 with self.assertRaisesRegex(ValueError, "DatasetRef given, cannot use dataId as well"): 

459 butler.get(ref, dataId) 

460 # Getting with an explicit ref should fail if the id doesn't match. 

461 with self.assertRaises(FileNotFoundError): 

462 butler.get(DatasetRef(ref.datasetType, ref.dataId, id=uuid.UUID(int=101), run=butler.run)) 

463 

464 # Getting a dataset with unknown parameters should fail 

465 with self.assertRaisesRegex(KeyError, "Parameter 'unsupported' not understood"): 

466 butler.get(ref, parameters={"unsupported": True}) 

467 

468 # Check we have a collection 

469 collections = set(butler.registry.queryCollections()) 

470 self.assertEqual(collections, expected_collections) 

471 

472 # Clean up to check that we can remove something that may have 

473 # already had a component removed 

474 butler.pruneDatasets([ref], unstore=True, purge=True) 

475 

476 # Add the same ref again, so we can check that duplicate put fails. 

477 ref = butler.put(metric, datasetType, dataId) 

478 

479 # Repeat put will fail. 

480 with self.assertRaisesRegex( 

481 ConflictingDefinitionError, "A database constraint failure was triggered" 

482 ): 

483 butler.put(metric, datasetType, dataId) 

484 

485 # Remove the datastore entry. 

486 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

487 

488 # Put will still fail 

489 with self.assertRaisesRegex( 

490 ConflictingDefinitionError, "A database constraint failure was triggered" 

491 ): 

492 butler.put(metric, datasetType, dataId) 

493 

494 # Repeat the same sequence with resolved ref. 

495 butler.pruneDatasets([ref], unstore=True, purge=True) 

496 ref = butler.put(metric, refIn) 

497 

498 # Repeat put will fail. 

499 with self.assertRaisesRegex(ConflictingDefinitionError, "Datastore already contains dataset"): 

500 butler.put(metric, refIn) 

501 

502 # Remove the datastore entry. 

503 butler.pruneDatasets([ref], unstore=True, purge=False, disassociate=False) 

504 

505 # In case of resolved ref this write will succeed. 

506 ref = butler.put(metric, refIn) 

507 

508 # Leave the dataset in place since some downstream tests require 

509 # something to be present 

510 

511 return butler 

512 

513 def testDeferredCollectionPassing(self) -> None: 

514 # Construct a butler with no run or collection, but make it writeable. 

515 butler = Butler(self.tmpConfigFile, writeable=True) 

516 # Create and register a DatasetType 

517 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

518 datasetType = self.addDatasetType( 

519 "example", dimensions, self.storageClassFactory.getStorageClass("StructuredData"), butler.registry 

520 ) 

521 # Add needed Dimensions 

522 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

523 butler.registry.insertDimensionData( 

524 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

525 ) 

526 butler.registry.insertDimensionData( 

527 "visit", 

528 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

529 ) 

530 dataId = {"instrument": "DummyCamComp", "visit": 423} 

531 # Create dataset. 

532 metric = makeExampleMetrics() 

533 # Register a new run and put dataset. 

534 run = "deferred" 

535 self.assertTrue(butler.registry.registerRun(run)) 

536 # Second time it will be allowed but indicate no-op 

537 self.assertFalse(butler.registry.registerRun(run)) 

538 ref = butler.put(metric, datasetType, dataId, run=run) 

539 # Putting with no run should fail with TypeError. 

540 with self.assertRaises(CollectionError): 

541 butler.put(metric, datasetType, dataId) 

542 # Dataset should exist. 

543 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

544 # We should be able to get the dataset back, but with and without 

545 # a deferred dataset handle. 

546 self.assertEqual(metric, butler.get(datasetType, dataId, collections=[run])) 

547 self.assertEqual(metric, butler.getDeferred(datasetType, dataId, collections=[run]).get()) 

548 # Trying to find the dataset without any collection is a TypeError. 

549 self.assertFalse(butler.exists(datasetType, dataId)) 

550 with self.assertRaises(CollectionError): 

551 butler.get(datasetType, dataId) 

552 # Associate the dataset with a different collection. 

553 butler.registry.registerCollection("tagged") 

554 butler.registry.associate("tagged", [ref]) 

555 # Deleting the dataset from the new collection should make it findable 

556 # in the original collection. 

557 butler.pruneDatasets([ref], tags=["tagged"]) 

558 self.assertTrue(butler.exists(datasetType, dataId, collections=[run])) 

559 

560 

561class ButlerTests(ButlerPutGetTests): 

562 """Tests for Butler.""" 

563 

564 useTempRoot = True 

565 validationCanFail: bool 

566 fullConfigKey: str | None 

567 registryStr: str | None 

568 datastoreName: list[str] | None 

569 datastoreStr: list[str] 

570 

571 def setUp(self) -> None: 

572 """Create a new butler root for each test.""" 

573 self.root = makeTestTempDir(TESTDIR) 

574 Butler.makeRepo(self.root, config=Config(self.configFile)) 

575 self.tmpConfigFile = os.path.join(self.root, "butler.yaml") 

576 

577 def testConstructor(self) -> None: 

578 """Independent test of constructor.""" 

579 butler = Butler(self.tmpConfigFile, run=self.default_run) 

580 self.assertIsInstance(butler, Butler) 

581 

582 # Check that butler.yaml is added automatically. 

583 if self.tmpConfigFile.endswith(end := "/butler.yaml"): 

584 config_dir = self.tmpConfigFile[: -len(end)] 

585 butler = Butler(config_dir, run=self.default_run) 

586 self.assertIsInstance(butler, Butler) 

587 

588 # Even with a ResourcePath. 

589 butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) 

590 self.assertIsInstance(butler, Butler) 

591 

592 collections = set(butler.registry.queryCollections()) 

593 self.assertEqual(collections, {self.default_run}) 

594 

595 # Check that some special characters can be included in run name. 

596 special_run = "u@b.c-A" 

597 butler_special = Butler(butler=butler, run=special_run) 

598 collections = set(butler_special.registry.queryCollections("*@*")) 

599 self.assertEqual(collections, {special_run}) 

600 

601 butler2 = Butler(butler=butler, collections=["other"]) 

602 self.assertEqual(butler2.collections, ("other",)) 

603 self.assertIsNone(butler2.run) 

604 self.assertIs(butler._datastore, butler2._datastore) 

605 

606 # Test that we can use an environment variable to find this 

607 # repository. 

608 butler_index = Config() 

609 butler_index["label"] = self.tmpConfigFile 

610 for suffix in (".yaml", ".json"): 

611 # Ensure that the content differs so that we know that 

612 # we aren't reusing the cache. 

613 bad_label = f"file://bucket/not_real{suffix}" 

614 butler_index["bad_label"] = bad_label 

615 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

616 butler_index.dumpToUri(temp_file) 

617 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

618 self.assertEqual(Butler.get_known_repos(), {"label", "bad_label"}) 

619 uri = Butler.get_repo_uri("bad_label") 

620 self.assertEqual(uri, ResourcePath(bad_label)) 

621 uri = Butler.get_repo_uri("label") 

622 butler = Butler(uri, writeable=False) 

623 self.assertIsInstance(butler, Butler) 

624 butler = Butler("label", writeable=False) 

625 self.assertIsInstance(butler, Butler) 

626 with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): 

627 Butler("not_there", writeable=False) 

628 with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): 

629 Butler("bad_label") 

630 with self.assertRaises(FileNotFoundError): 

631 # Should ignore aliases. 

632 Butler(ResourcePath("label", forceAbsolute=False)) 

633 with self.assertRaises(KeyError) as cm: 

634 Butler.get_repo_uri("missing") 

635 self.assertEqual( 

636 Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False) 

637 ) 

638 self.assertIn("not known to", str(cm.exception)) 

639 # Should report no failure. 

640 self.assertEqual(ButlerRepoIndex.get_failure_reason(), "") 

641 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

642 # Now with empty configuration. 

643 butler_index = Config() 

644 butler_index.dumpToUri(temp_file) 

645 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

646 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): 

647 Butler("label") 

648 with ResourcePath.temporary_uri(suffix=suffix) as temp_file: 

649 # Now with bad contents. 

650 with open(temp_file.ospath, "w") as fh: 

651 print("'", file=fh) 

652 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): 

653 with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): 

654 Butler("label") 

655 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): 

656 with self.assertRaises(FileNotFoundError): 

657 Butler.get_repo_uri("label") 

658 self.assertEqual(Butler.get_known_repos(), set()) 

659 

660 with self.assertRaisesRegex(FileNotFoundError, "index file not found"): 

661 Butler("label") 

662 

663 # Check that we can create Butler when the alias file is not found. 

664 butler = Butler(self.tmpConfigFile, writeable=False) 

665 self.assertIsInstance(butler, Butler) 

666 with self.assertRaises(KeyError) as cm: 

667 # No environment variable set. 

668 Butler.get_repo_uri("label") 

669 self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False)) 

670 self.assertIn("No repository index defined", str(cm.exception)) 

671 with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): 

672 # No aliases registered. 

673 Butler("not_there") 

674 self.assertEqual(Butler.get_known_repos(), set()) 

675 

676 def testBasicPutGet(self) -> None: 

677 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

678 self.runPutGetTest(storageClass, "test_metric") 

679 

680 def testCompositePutGetConcrete(self) -> None: 

681 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadCompNoDisassembly") 

682 butler = self.runPutGetTest(storageClass, "test_metric") 

683 

684 # Should *not* be disassembled 

685 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

686 self.assertEqual(len(datasets), 1) 

687 uri, components = butler.getURIs(datasets[0]) 

688 self.assertIsInstance(uri, ResourcePath) 

689 self.assertFalse(components) 

690 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

691 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

692 

693 # Predicted dataset 

694 dataId = {"instrument": "DummyCamComp", "visit": 424} 

695 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

696 self.assertFalse(components) 

697 self.assertIsInstance(uri, ResourcePath) 

698 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

699 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

700 

701 def testCompositePutGetVirtual(self) -> None: 

702 storageClass = self.storageClassFactory.getStorageClass("StructuredCompositeReadComp") 

703 butler = self.runPutGetTest(storageClass, "test_metric_comp") 

704 

705 # Should be disassembled 

706 datasets = list(butler.registry.queryDatasets(..., collections=self.default_run)) 

707 self.assertEqual(len(datasets), 1) 

708 uri, components = butler.getURIs(datasets[0]) 

709 

710 if butler._datastore.isEphemeral: 

711 # Never disassemble in-memory datastore 

712 self.assertIsInstance(uri, ResourcePath) 

713 self.assertFalse(components) 

714 self.assertEqual(uri.fragment, "", f"Checking absence of fragment in {uri}") 

715 self.assertIn("423", str(uri), f"Checking visit is in URI {uri}") 

716 else: 

717 self.assertIsNone(uri) 

718 self.assertEqual(set(components), set(storageClass.components)) 

719 for compuri in components.values(): 

720 self.assertIsInstance(compuri, ResourcePath) 

721 self.assertIn("423", str(compuri), f"Checking visit is in URI {compuri}") 

722 self.assertEqual(compuri.fragment, "", f"Checking absence of fragment in {compuri}") 

723 

724 # Predicted dataset 

725 dataId = {"instrument": "DummyCamComp", "visit": 424} 

726 uri, components = butler.getURIs(datasets[0].datasetType, dataId=dataId, predict=True) 

727 

728 if butler._datastore.isEphemeral: 

729 # Never disassembled 

730 self.assertIsInstance(uri, ResourcePath) 

731 self.assertFalse(components) 

732 self.assertIn("424", str(uri), f"Checking visit is in URI {uri}") 

733 self.assertEqual(uri.fragment, "predicted", f"Checking for fragment in {uri}") 

734 else: 

735 self.assertIsNone(uri) 

736 self.assertEqual(set(components), set(storageClass.components)) 

737 for compuri in components.values(): 

738 self.assertIsInstance(compuri, ResourcePath) 

739 self.assertIn("424", str(compuri), f"Checking visit is in URI {compuri}") 

740 self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}") 

741 

742 def testStorageClassOverrideGet(self) -> None: 

743 """Test storage class conversion on get with override.""" 

744 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

745 datasetTypeName = "anything" 

746 run = self.default_run 

747 

748 butler, datasetType = self.create_butler(run, storageClass, datasetTypeName) 

749 

750 # Create and store a dataset. 

751 metric = makeExampleMetrics() 

752 dataId = {"instrument": "DummyCamComp", "visit": 423} 

753 

754 ref = butler.put(metric, datasetType, dataId) 

755 

756 # Return native type. 

757 retrieved = butler.get(ref) 

758 self.assertEqual(retrieved, metric) 

759 

760 # Specify an override. 

761 new_sc = self.storageClassFactory.getStorageClass("MetricsConversion") 

762 model = butler.get(ref, storageClass=new_sc) 

763 self.assertNotEqual(type(model), type(retrieved)) 

764 self.assertIs(type(model), new_sc.pytype) 

765 self.assertEqual(retrieved, model) 

766 

767 # Defer but override later. 

768 deferred = butler.getDeferred(ref) 

769 model = deferred.get(storageClass=new_sc) 

770 self.assertIs(type(model), new_sc.pytype) 

771 self.assertEqual(retrieved, model) 

772 

773 # Defer but override up front. 

774 deferred = butler.getDeferred(ref, storageClass=new_sc) 

775 model = deferred.get() 

776 self.assertIs(type(model), new_sc.pytype) 

777 self.assertEqual(retrieved, model) 

778 

779 # Retrieve a component. Should be a tuple. 

780 data = butler.get("anything.data", dataId, storageClass="StructuredDataDataTestTuple") 

781 self.assertIs(type(data), tuple) 

782 self.assertEqual(data, tuple(retrieved.data)) 

783 

784 # Parameter on the write storage class should work regardless 

785 # of read storage class. 

786 data = butler.get( 

787 "anything.data", 

788 dataId, 

789 storageClass="StructuredDataDataTestTuple", 

790 parameters={"slice": slice(2, 4)}, 

791 ) 

792 self.assertEqual(len(data), 2) 

793 

794 # Try a parameter that is known to the read storage class but not 

795 # the write storage class. 

796 with self.assertRaises(KeyError): 

797 butler.get( 

798 "anything.data", 

799 dataId, 

800 storageClass="StructuredDataDataTestTuple", 

801 parameters={"xslice": slice(2, 4)}, 

802 ) 

803 

804 def testPytypePutCoercion(self) -> None: 

805 """Test python type coercion on Butler.get and put.""" 

806 # Store some data with the normal example storage class. 

807 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

808 datasetTypeName = "test_metric" 

809 butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName) 

810 

811 dataId = {"instrument": "DummyCamComp", "visit": 423} 

812 

813 # Put a dict and this should coerce to a MetricsExample 

814 test_dict = {"summary": {"a": 1}, "output": {"b": 2}} 

815 metric_ref = butler.put(test_dict, datasetTypeName, dataId=dataId, visit=424) 

816 test_metric = butler.get(metric_ref) 

817 self.assertEqual(get_full_type_name(test_metric), "lsst.daf.butler.tests.MetricsExample") 

818 self.assertEqual(test_metric.summary, test_dict["summary"]) 

819 self.assertEqual(test_metric.output, test_dict["output"]) 

820 

821 # Check that the put still works if a DatasetType is given with 

822 # a definition matching this python type. 

823 registry_type = butler.registry.getDatasetType(datasetTypeName) 

824 this_type = DatasetType(datasetTypeName, registry_type.dimensions, "StructuredDataDictJson") 

825 metric2_ref = butler.put(test_dict, this_type, dataId=dataId, visit=425) 

826 self.assertEqual(metric2_ref.datasetType, registry_type) 

827 

828 # The get will return the type expected by registry. 

829 test_metric2 = butler.get(metric2_ref) 

830 self.assertEqual(get_full_type_name(test_metric2), "lsst.daf.butler.tests.MetricsExample") 

831 

832 # Make a new DatasetRef with the compatible but different DatasetType. 

833 # This should now return a dict. 

834 new_ref = DatasetRef(this_type, metric2_ref.dataId, id=metric2_ref.id, run=metric2_ref.run) 

835 test_dict2 = butler.get(new_ref) 

836 self.assertEqual(get_full_type_name(test_dict2), "dict") 

837 

838 # Get it again with the wrong dataset type definition using get() 

839 # rather than get(). This should be consistent with get() 

840 # behavior and return the type of the DatasetType. 

841 test_dict3 = butler.get(this_type, dataId=dataId, visit=425) 

842 self.assertEqual(get_full_type_name(test_dict3), "dict") 

843 

844 def testIngest(self) -> None: 

845 butler = Butler(self.tmpConfigFile, run=self.default_run) 

846 

847 # Create and register a DatasetType 

848 dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) 

849 

850 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDictYaml") 

851 datasetTypeName = "metric" 

852 

853 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

854 

855 # Add needed Dimensions 

856 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

857 butler.registry.insertDimensionData( 

858 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

859 ) 

860 for detector in (1, 2): 

861 butler.registry.insertDimensionData( 

862 "detector", {"instrument": "DummyCamComp", "id": detector, "full_name": f"detector{detector}"} 

863 ) 

864 

865 butler.registry.insertDimensionData( 

866 "visit", 

867 {"instrument": "DummyCamComp", "id": 423, "name": "fourtwentythree", "physical_filter": "d-r"}, 

868 {"instrument": "DummyCamComp", "id": 424, "name": "fourtwentyfour", "physical_filter": "d-r"}, 

869 ) 

870 

871 formatter = doImportType("lsst.daf.butler.formatters.yaml.YamlFormatter") 

872 dataRoot = os.path.join(TESTDIR, "data", "basic") 

873 datasets = [] 

874 for detector in (1, 2): 

875 detector_name = f"detector_{detector}" 

876 metricFile = os.path.join(dataRoot, f"{detector_name}.yaml") 

877 dataId = butler.registry.expandDataId( 

878 {"instrument": "DummyCamComp", "visit": 423, "detector": detector} 

879 ) 

880 # Create a DatasetRef for ingest 

881 refIn = DatasetRef(datasetType, dataId, run=self.default_run) 

882 

883 datasets.append(FileDataset(path=metricFile, refs=[refIn], formatter=formatter)) 

884 

885 butler.ingest(*datasets, transfer="copy") 

886 

887 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 423} 

888 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 423} 

889 

890 metrics1 = butler.get(datasetTypeName, dataId1) 

891 metrics2 = butler.get(datasetTypeName, dataId2) 

892 self.assertNotEqual(metrics1, metrics2) 

893 

894 # Compare URIs 

895 uri1 = butler.getURI(datasetTypeName, dataId1) 

896 uri2 = butler.getURI(datasetTypeName, dataId2) 

897 self.assertNotEqual(uri1, uri2) 

898 

899 # Now do a multi-dataset but single file ingest 

900 metricFile = os.path.join(dataRoot, "detectors.yaml") 

901 refs = [] 

902 for detector in (1, 2): 

903 detector_name = f"detector_{detector}" 

904 dataId = butler.registry.expandDataId( 

905 {"instrument": "DummyCamComp", "visit": 424, "detector": detector} 

906 ) 

907 # Create a DatasetRef for ingest 

908 refs.append(DatasetRef(datasetType, dataId, run=self.default_run)) 

909 

910 # Test "move" transfer to ensure that the files themselves 

911 # have disappeared following ingest. 

912 with ResourcePath.temporary_uri(suffix=".yaml") as tempFile: 

913 tempFile.transfer_from(ResourcePath(metricFile), transfer="copy") 

914 

915 datasets = [] 

916 datasets.append(FileDataset(path=tempFile, refs=refs, formatter=MultiDetectorFormatter)) 

917 

918 # For first ingest use copy. 

919 butler.ingest(*datasets, transfer="copy", record_validation_info=False) 

920 

921 # Now try to ingest again in "execution butler" mode where 

922 # the registry entries exist but the datastore does not have 

923 # the files. We also need to strip the dimension records to ensure 

924 # that they will be re-added by the ingest. 

925 ref = datasets[0].refs[0] 

926 datasets[0].refs = [ 

927 cast( 

928 DatasetRef, 

929 butler.registry.findDataset(ref.datasetType, dataId=ref.dataId, collections=ref.run), 

930 ) 

931 for ref in datasets[0].refs 

932 ] 

933 all_refs = [] 

934 for dataset in datasets: 

935 refs = [] 

936 for ref in dataset.refs: 

937 # Create a dict from the dataId to drop the records. 

938 new_data_id = {str(k): v for k, v in ref.dataId.items()} 

939 new_ref = butler.registry.findDataset(ref.datasetType, new_data_id, collections=ref.run) 

940 assert new_ref is not None 

941 self.assertFalse(new_ref.dataId.hasRecords()) 

942 refs.append(new_ref) 

943 dataset.refs = refs 

944 all_refs.extend(dataset.refs) 

945 butler.pruneDatasets(all_refs, disassociate=False, unstore=True, purge=False) 

946 

947 # Use move mode to test that the file is deleted. Also 

948 # disable recording of file size. 

949 butler.ingest(*datasets, transfer="move", record_validation_info=False) 

950 

951 # Check that every ref now has records. 

952 for dataset in datasets: 

953 for ref in dataset.refs: 

954 self.assertTrue(ref.dataId.hasRecords()) 

955 

956 # Ensure that the file has disappeared. 

957 self.assertFalse(tempFile.exists()) 

958 

959 # Check that the datastore recorded no file size. 

960 # Not all datastores can support this. 

961 try: 

962 infos = butler._datastore.getStoredItemsInfo(datasets[0].refs[0]) # type: ignore[attr-defined] 

963 self.assertEqual(infos[0].file_size, -1) 

964 except AttributeError: 

965 pass 

966 

967 dataId1 = {"instrument": "DummyCamComp", "detector": 1, "visit": 424} 

968 dataId2 = {"instrument": "DummyCamComp", "detector": 2, "visit": 424} 

969 

970 multi1 = butler.get(datasetTypeName, dataId1) 

971 multi2 = butler.get(datasetTypeName, dataId2) 

972 

973 self.assertEqual(multi1, metrics1) 

974 self.assertEqual(multi2, metrics2) 

975 

976 # Compare URIs 

977 uri1 = butler.getURI(datasetTypeName, dataId1) 

978 uri2 = butler.getURI(datasetTypeName, dataId2) 

979 self.assertEqual(uri1, uri2, f"Cf. {uri1} with {uri2}") 

980 

981 # Test that removing one does not break the second 

982 # This line will issue a warning log message for a ChainedDatastore 

983 # that uses an InMemoryDatastore since in-memory can not ingest 

984 # files. 

985 butler.pruneDatasets([datasets[0].refs[0]], unstore=True, disassociate=False) 

986 self.assertFalse(butler.exists(datasetTypeName, dataId1)) 

987 self.assertTrue(butler.exists(datasetTypeName, dataId2)) 

988 multi2b = butler.get(datasetTypeName, dataId2) 

989 self.assertEqual(multi2, multi2b) 

990 

991 # Ensure we can ingest 0 datasets 

992 datasets = [] 

993 butler.ingest(*datasets) 

994 

995 def testPickle(self) -> None: 

996 """Test pickle support.""" 

997 butler = Butler(self.tmpConfigFile, run=self.default_run) 

998 butlerOut = pickle.loads(pickle.dumps(butler)) 

999 self.assertIsInstance(butlerOut, Butler) 

1000 self.assertEqual(butlerOut._config, butler._config) 

1001 self.assertEqual(butlerOut.collections, butler.collections) 

1002 self.assertEqual(butlerOut.run, butler.run) 

1003 

1004 def testGetDatasetTypes(self) -> None: 

1005 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1006 dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) 

1007 dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ 

1008 ( 

1009 "instrument", 

1010 [ 

1011 {"instrument": "DummyCam"}, 

1012 {"instrument": "DummyHSC"}, 

1013 {"instrument": "DummyCamComp"}, 

1014 ], 

1015 ), 

1016 ("physical_filter", [{"instrument": "DummyCam", "name": "d-r", "band": "R"}]), 

1017 ("visit", [{"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}]), 

1018 ] 

1019 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1020 # Add needed Dimensions 

1021 for element, data in dimensionEntries: 

1022 butler.registry.insertDimensionData(element, *data) 

1023 

1024 # When a DatasetType is added to the registry entries are not created 

1025 # for components but querying them can return the components. 

1026 datasetTypeNames = {"metric", "metric2", "metric4", "metric33", "pvi", "paramtest"} 

1027 components = set() 

1028 for datasetTypeName in datasetTypeNames: 

1029 # Create and register a DatasetType 

1030 self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1031 

1032 for componentName in storageClass.components: 

1033 components.add(DatasetType.nameWithComponent(datasetTypeName, componentName)) 

1034 

1035 fromRegistry: set[DatasetType] = set() 

1036 for parent_dataset_type in butler.registry.queryDatasetTypes(): 

1037 fromRegistry.add(parent_dataset_type) 

1038 fromRegistry.update(parent_dataset_type.makeAllComponentDatasetTypes()) 

1039 self.assertEqual({d.name for d in fromRegistry}, datasetTypeNames | components) 

1040 

1041 # Now that we have some dataset types registered, validate them 

1042 butler.validateConfiguration( 

1043 ignore=[ 

1044 "test_metric_comp", 

1045 "metric3", 

1046 "metric5", 

1047 "calexp", 

1048 "DummySC", 

1049 "datasetType.component", 

1050 "random_data", 

1051 "random_data_2", 

1052 ] 

1053 ) 

1054 

1055 # Add a new datasetType that will fail template validation 

1056 self.addDatasetType("test_metric_comp", dimensions, storageClass, butler.registry) 

1057 if self.validationCanFail: 

1058 with self.assertRaises(ValidationError): 

1059 butler.validateConfiguration() 

1060 

1061 # Rerun validation but with a subset of dataset type names 

1062 butler.validateConfiguration(datasetTypeNames=["metric4"]) 

1063 

1064 # Rerun validation but ignore the bad datasetType 

1065 butler.validateConfiguration( 

1066 ignore=[ 

1067 "test_metric_comp", 

1068 "metric3", 

1069 "metric5", 

1070 "calexp", 

1071 "DummySC", 

1072 "datasetType.component", 

1073 "random_data", 

1074 "random_data_2", 

1075 ] 

1076 ) 

1077 

1078 def testTransaction(self) -> None: 

1079 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1080 datasetTypeName = "test_metric" 

1081 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1082 dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( 

1083 ("instrument", {"instrument": "DummyCam"}), 

1084 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}), 

1085 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}), 

1086 ) 

1087 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1088 metric = makeExampleMetrics() 

1089 dataId = {"instrument": "DummyCam", "visit": 42} 

1090 # Create and register a DatasetType 

1091 datasetType = self.addDatasetType(datasetTypeName, dimensions, storageClass, butler.registry) 

1092 with self.assertRaises(TransactionTestError): 

1093 with butler.transaction(): 

1094 # Add needed Dimensions 

1095 for args in dimensionEntries: 

1096 butler.registry.insertDimensionData(*args) 

1097 # Store a dataset 

1098 ref = butler.put(metric, datasetTypeName, dataId) 

1099 self.assertIsInstance(ref, DatasetRef) 

1100 # Test getDirect 

1101 metricOut = butler.get(ref) 

1102 self.assertEqual(metric, metricOut) 

1103 # Test get 

1104 metricOut = butler.get(datasetTypeName, dataId) 

1105 self.assertEqual(metric, metricOut) 

1106 # Check we can get components 

1107 self.assertGetComponents(butler, ref, ("summary", "data", "output"), metric) 

1108 raise TransactionTestError("This should roll back the entire transaction") 

1109 with self.assertRaises(DataIdValueError, msg=f"Check can't expand DataId {dataId}"): 

1110 butler.registry.expandDataId(dataId) 

1111 # Should raise LookupError for missing data ID value 

1112 with self.assertRaises(LookupError, msg=f"Check can't get by {datasetTypeName} and {dataId}"): 

1113 butler.get(datasetTypeName, dataId) 

1114 # Also check explicitly if Dataset entry is missing 

1115 self.assertIsNone(butler.registry.findDataset(datasetType, dataId, collections=butler.collections)) 

1116 # Direct retrieval should not find the file in the Datastore 

1117 with self.assertRaises(FileNotFoundError, msg=f"Check {ref} can't be retrieved directly"): 

1118 butler.get(ref) 

1119 

1120 def testMakeRepo(self) -> None: 

1121 """Test that we can write butler configuration to a new repository via 

1122 the Butler.makeRepo interface and then instantiate a butler from the 

1123 repo root. 

1124 """ 

1125 # Do not run the test if we know this datastore configuration does 

1126 # not support a file system root 

1127 if self.fullConfigKey is None: 

1128 return 

1129 

1130 # create two separate directories 

1131 root1 = tempfile.mkdtemp(dir=self.root) 

1132 root2 = tempfile.mkdtemp(dir=self.root) 

1133 

1134 butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) 

1135 limited = Config(self.configFile) 

1136 butler1 = Butler(butlerConfig) 

1137 butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) 

1138 full = Config(self.tmpConfigFile) 

1139 butler2 = Butler(butlerConfig) 

1140 # Butlers should have the same configuration regardless of whether 

1141 # defaults were expanded. 

1142 self.assertEqual(butler1._config, butler2._config) 

1143 # Config files loaded directly should not be the same. 

1144 self.assertNotEqual(limited, full) 

1145 # Make sure "limited" doesn't have a few keys we know it should be 

1146 # inheriting from defaults. 

1147 self.assertIn(self.fullConfigKey, full) 

1148 self.assertNotIn(self.fullConfigKey, limited) 

1149 

1150 # Collections don't appear until something is put in them 

1151 collections1 = set(butler1.registry.queryCollections()) 

1152 self.assertEqual(collections1, set()) 

1153 self.assertEqual(set(butler2.registry.queryCollections()), collections1) 

1154 

1155 # Check that a config with no associated file name will not 

1156 # work properly with relocatable Butler repo 

1157 butlerConfig.configFile = None 

1158 with self.assertRaises(ValueError): 

1159 Butler(butlerConfig) 

1160 

1161 with self.assertRaises(FileExistsError): 

1162 Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) 

1163 

1164 def testStringification(self) -> None: 

1165 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1166 butlerStr = str(butler) 

1167 

1168 if self.datastoreStr is not None: 

1169 for testStr in self.datastoreStr: 

1170 self.assertIn(testStr, butlerStr) 

1171 if self.registryStr is not None: 

1172 self.assertIn(self.registryStr, butlerStr) 

1173 

1174 datastoreName = butler._datastore.name 

1175 if self.datastoreName is not None: 

1176 for testStr in self.datastoreName: 

1177 self.assertIn(testStr, datastoreName) 

1178 

1179 def testButlerRewriteDataId(self) -> None: 

1180 """Test that dataIds can be rewritten based on dimension records.""" 

1181 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1182 

1183 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1184 datasetTypeName = "random_data" 

1185 

1186 # Create dimension records. 

1187 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1188 butler.registry.insertDimensionData( 

1189 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1190 ) 

1191 butler.registry.insertDimensionData( 

1192 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

1193 ) 

1194 

1195 dimensions = butler.dimensions.extract(["instrument", "exposure"]) 

1196 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

1197 butler.registry.registerDatasetType(datasetType) 

1198 

1199 n_exposures = 5 

1200 dayobs = 20210530 

1201 

1202 for i in range(n_exposures): 

1203 butler.registry.insertDimensionData( 

1204 "exposure", 

1205 { 

1206 "instrument": "DummyCamComp", 

1207 "id": i, 

1208 "obs_id": f"exp{i}", 

1209 "seq_num": i, 

1210 "day_obs": dayobs, 

1211 "physical_filter": "d-r", 

1212 }, 

1213 ) 

1214 

1215 # Write some data. 

1216 for i in range(n_exposures): 

1217 metric = {"something": i, "other": "metric", "list": [2 * x for x in range(i)]} 

1218 

1219 # Use the seq_num for the put to test rewriting. 

1220 dataId = {"seq_num": i, "day_obs": dayobs, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

1221 ref = butler.put(metric, datasetTypeName, dataId=dataId) 

1222 

1223 # Check that the exposure is correct in the dataId 

1224 self.assertEqual(ref.dataId["exposure"], i) 

1225 

1226 # and check that we can get the dataset back with the same dataId 

1227 new_metric = butler.get(datasetTypeName, dataId=dataId) 

1228 self.assertEqual(new_metric, metric) 

1229 

1230 

1231class FileDatastoreButlerTests(ButlerTests): 

1232 """Common tests and specialization of ButlerTests for butlers backed 

1233 by datastores that inherit from FileDatastore. 

1234 """ 

1235 

1236 def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) -> bool: 

1237 """Check if file exists at a given path (relative to root). 

1238 

1239 Test testPutTemplates verifies actual physical existance of the files 

1240 in the requested location. 

1241 """ 

1242 uri = ResourcePath(root, forceDirectory=True) 

1243 return uri.join(relpath).exists() 

1244 

1245 def testPutTemplates(self) -> None: 

1246 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1247 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1248 

1249 # Add needed Dimensions 

1250 butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

1251 butler.registry.insertDimensionData( 

1252 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

1253 ) 

1254 butler.registry.insertDimensionData( 

1255 "visit", {"instrument": "DummyCamComp", "id": 423, "name": "v423", "physical_filter": "d-r"} 

1256 ) 

1257 butler.registry.insertDimensionData( 

1258 "visit", {"instrument": "DummyCamComp", "id": 425, "name": "v425", "physical_filter": "d-r"} 

1259 ) 

1260 

1261 # Create and store a dataset 

1262 metric = makeExampleMetrics() 

1263 

1264 # Create two almost-identical DatasetTypes (both will use default 

1265 # template) 

1266 dimensions = butler.dimensions.extract(["instrument", "visit"]) 

1267 butler.registry.registerDatasetType(DatasetType("metric1", dimensions, storageClass)) 

1268 butler.registry.registerDatasetType(DatasetType("metric2", dimensions, storageClass)) 

1269 butler.registry.registerDatasetType(DatasetType("metric3", dimensions, storageClass)) 

1270 

1271 dataId1 = {"instrument": "DummyCamComp", "visit": 423} 

1272 dataId2 = {"instrument": "DummyCamComp", "visit": 423, "physical_filter": "d-r"} 

1273 

1274 # Put with exactly the data ID keys needed 

1275 ref = butler.put(metric, "metric1", dataId1) 

1276 uri = butler.getURI(ref) 

1277 self.assertTrue(uri.exists()) 

1278 self.assertTrue( 

1279 uri.unquoted_path.endswith(f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle") 

1280 ) 

1281 

1282 # Check the template based on dimensions 

1283 if hasattr(butler._datastore, "templates"): 

1284 butler._datastore.templates.validateTemplates([ref]) 

1285 

1286 # Put with extra data ID keys (physical_filter is an optional 

1287 # dependency); should not change template (at least the way we're 

1288 # defining them to behave now; the important thing is that they 

1289 # must be consistent). 

1290 ref = butler.put(metric, "metric2", dataId2) 

1291 uri = butler.getURI(ref) 

1292 self.assertTrue(uri.exists()) 

1293 self.assertTrue( 

1294 uri.unquoted_path.endswith(f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle") 

1295 ) 

1296 

1297 # Check the template based on dimensions 

1298 if hasattr(butler._datastore, "templates"): 

1299 butler._datastore.templates.validateTemplates([ref]) 

1300 

1301 # Use a template that has a typo in dimension record metadata. 

1302 # Easier to test with a butler that has a ref with records attached. 

1303 template = FileTemplate("a/{visit.name}/{id}_{visit.namex:?}.fits") 

1304 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1305 path = template.format(ref) 

1306 self.assertEqual(path, f"a/v423/{ref.id}_fits") 

1307 

1308 template = FileTemplate("a/{visit.name}/{id}_{visit.namex}.fits") 

1309 with self.assertRaises(KeyError): 

1310 with self.assertLogs("lsst.daf.butler.datastore.file_templates", "INFO"): 

1311 template.format(ref) 

1312 

1313 # Now use a file template that will not result in unique filenames 

1314 with self.assertRaises(FileTemplateValidationError): 

1315 butler.put(metric, "metric3", dataId1) 

1316 

1317 def testImportExport(self) -> None: 

1318 # Run put/get tests just to create and populate a repo. 

1319 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1320 self.runImportExportTest(storageClass) 

1321 

1322 @unittest.expectedFailure 

1323 def testImportExportVirtualComposite(self) -> None: 

1324 # Run put/get tests just to create and populate a repo. 

1325 storageClass = self.storageClassFactory.getStorageClass("StructuredComposite") 

1326 self.runImportExportTest(storageClass) 

1327 

1328 def runImportExportTest(self, storageClass: StorageClass) -> None: 

1329 """Test exporting and importing. 

1330 

1331 This test does an export to a temp directory and an import back 

1332 into a new temp directory repo. It does not assume a posix datastore. 

1333 """ 

1334 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1335 

1336 # Test that we must have a file extension. 

1337 with self.assertRaises(ValueError): 

1338 with exportButler.export(filename="dump", directory=".") as export: 

1339 pass 

1340 

1341 # Test that unknown format is not allowed. 

1342 with self.assertRaises(ValueError): 

1343 with exportButler.export(filename="dump.fits", directory=".") as export: 

1344 pass 

1345 

1346 # Test that the repo actually has at least one dataset. 

1347 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1348 self.assertGreater(len(datasets), 0) 

1349 # Add a DimensionRecord that's unused by those datasets. 

1350 skymapRecord = {"name": "example_skymap", "hash": (50).to_bytes(8, byteorder="little")} 

1351 exportButler.registry.insertDimensionData("skymap", skymapRecord) 

1352 # Export and then import datasets. 

1353 with safeTestTempDir(TESTDIR) as exportDir: 

1354 exportFile = os.path.join(exportDir, "exports.yaml") 

1355 with exportButler.export(filename=exportFile, directory=exportDir, transfer="auto") as export: 

1356 export.saveDatasets(datasets) 

1357 # Export the same datasets again. This should quietly do 

1358 # nothing because of internal deduplication, and it shouldn't 

1359 # complain about being asked to export the "htm7" elements even 

1360 # though there aren't any in these datasets or in the database. 

1361 export.saveDatasets(datasets, elements=["htm7"]) 

1362 # Save one of the data IDs again; this should be harmless 

1363 # because of internal deduplication. 

1364 export.saveDataIds([datasets[0].dataId]) 

1365 # Save some dimension records directly. 

1366 export.saveDimensionData("skymap", [skymapRecord]) 

1367 self.assertTrue(os.path.exists(exportFile)) 

1368 with safeTestTempDir(TESTDIR) as importDir: 

1369 # We always want this to be a local posix butler 

1370 Butler.makeRepo(importDir, config=Config(os.path.join(TESTDIR, "config/basic/butler.yaml"))) 

1371 # Calling script.butlerImport tests the implementation of the 

1372 # butler command line interface "import" subcommand. Functions 

1373 # in the script folder are generally considered protected and 

1374 # should not be used as public api. 

1375 with open(exportFile) as f: 

1376 script.butlerImport( 

1377 importDir, 

1378 export_file=f, 

1379 directory=exportDir, 

1380 transfer="auto", 

1381 skip_dimensions=None, 

1382 ) 

1383 importButler = Butler(importDir, run=self.default_run) 

1384 for ref in datasets: 

1385 with self.subTest(ref=ref): 

1386 # Test for existence by passing in the DatasetType and 

1387 # data ID separately, to avoid lookup by dataset_id. 

1388 self.assertTrue(importButler.exists(ref.datasetType, ref.dataId)) 

1389 self.assertEqual( 

1390 list(importButler.registry.queryDimensionRecords("skymap")), 

1391 [importButler.dimensions["skymap"].RecordClass(**skymapRecord)], 

1392 ) 

1393 

1394 def testRemoveRuns(self) -> None: 

1395 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1396 butler = Butler(self.tmpConfigFile, writeable=True) 

1397 # Load registry data with dimensions to hang datasets off of. 

1398 registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) 

1399 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1400 # Add some RUN-type collection. 

1401 run1 = "run1" 

1402 butler.registry.registerRun(run1) 

1403 run2 = "run2" 

1404 butler.registry.registerRun(run2) 

1405 # put a dataset in each 

1406 metric = makeExampleMetrics() 

1407 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1408 datasetType = self.addDatasetType( 

1409 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1410 ) 

1411 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1412 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1413 uri1 = butler.getURI(ref1) 

1414 uri2 = butler.getURI(ref2) 

1415 

1416 with self.assertRaises(OrphanedRecordError): 

1417 butler.registry.removeDatasetType(datasetType.name) 

1418 

1419 # Remove from both runs with different values for unstore. 

1420 butler.removeRuns([run1], unstore=True) 

1421 butler.removeRuns([run2], unstore=False) 

1422 # Should be nothing in registry for either one, and datastore should 

1423 # not think either exists. 

1424 with self.assertRaises(MissingCollectionError): 

1425 butler.registry.getCollectionType(run1) 

1426 with self.assertRaises(MissingCollectionError): 

1427 butler.registry.getCollectionType(run2) 

1428 self.assertFalse(butler.stored(ref1)) 

1429 self.assertFalse(butler.stored(ref2)) 

1430 # The ref we unstored should be gone according to the URI, but the 

1431 # one we forgot should still be around. 

1432 self.assertFalse(uri1.exists()) 

1433 self.assertTrue(uri2.exists()) 

1434 

1435 # Now that the collections have been pruned we can remove the 

1436 # dataset type 

1437 butler.registry.removeDatasetType(datasetType.name) 

1438 

1439 with self.assertLogs("lsst.daf.butler.registries", "INFO") as cm: 

1440 butler.registry.removeDatasetType(("test*", "test*")) 

1441 self.assertIn("not defined", "\n".join(cm.output)) 

1442 

1443 

1444class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1445 """PosixDatastore specialization of a butler""" 

1446 

1447 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1448 fullConfigKey: str | None = ".datastore.formatters" 

1449 validationCanFail = True 

1450 datastoreStr = ["/tmp"] 

1451 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1452 registryStr = "/gen3.sqlite3" 

1453 

1454 def testPathConstructor(self) -> None: 

1455 """Independent test of constructor using PathLike.""" 

1456 butler = Butler(self.tmpConfigFile, run=self.default_run) 

1457 self.assertIsInstance(butler, Butler) 

1458 

1459 # And again with a Path object with the butler yaml 

1460 path = pathlib.Path(self.tmpConfigFile) 

1461 butler = Butler(path, writeable=False) 

1462 self.assertIsInstance(butler, Butler) 

1463 

1464 # And again with a Path object without the butler yaml 

1465 # (making sure we skip it if the tmp config doesn't end 

1466 # in butler.yaml -- which is the case for a subclass) 

1467 if self.tmpConfigFile.endswith("butler.yaml"): 

1468 path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) 

1469 butler = Butler(path, writeable=False) 

1470 self.assertIsInstance(butler, Butler) 

1471 

1472 def testExportTransferCopy(self) -> None: 

1473 """Test local export using all transfer modes""" 

1474 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1475 exportButler = self.runPutGetTest(storageClass, "test_metric") 

1476 # Test that the repo actually has at least one dataset. 

1477 datasets = list(exportButler.registry.queryDatasets(..., collections=...)) 

1478 self.assertGreater(len(datasets), 0) 

1479 uris = [exportButler.getURI(d) for d in datasets] 

1480 assert isinstance(exportButler._datastore, FileDatastore) 

1481 datastoreRoot = exportButler.get_datastore_roots()[exportButler.get_datastore_names()[0]] 

1482 

1483 pathsInStore = [uri.relative_to(datastoreRoot) for uri in uris] 

1484 

1485 for path in pathsInStore: 

1486 # Assume local file system 

1487 assert path is not None 

1488 self.assertTrue(self.checkFileExists(datastoreRoot, path), f"Checking path {path}") 

1489 

1490 for transfer in ("copy", "link", "symlink", "relsymlink"): 

1491 with safeTestTempDir(TESTDIR) as exportDir: 

1492 with exportButler.export(directory=exportDir, format="yaml", transfer=transfer) as export: 

1493 export.saveDatasets(datasets) 

1494 for path in pathsInStore: 

1495 assert path is not None 

1496 self.assertTrue( 

1497 self.checkFileExists(exportDir, path), 

1498 f"Check that mode {transfer} exported files", 

1499 ) 

1500 

1501 def testPruneDatasets(self) -> None: 

1502 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1503 butler = Butler(self.tmpConfigFile, writeable=True) 

1504 assert isinstance(butler._datastore, FileDatastore) 

1505 # Load registry data with dimensions to hang datasets off of. 

1506 registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) 

1507 butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) 

1508 # Add some RUN-type collections. 

1509 run1 = "run1" 

1510 butler.registry.registerRun(run1) 

1511 run2 = "run2" 

1512 butler.registry.registerRun(run2) 

1513 # put some datasets. ref1 and ref2 have the same data ID, and are in 

1514 # different runs. ref3 has a different data ID. 

1515 metric = makeExampleMetrics() 

1516 dimensions = butler.dimensions.extract(["instrument", "physical_filter"]) 

1517 datasetType = self.addDatasetType( 

1518 "prune_collections_test_dataset", dimensions, storageClass, butler.registry 

1519 ) 

1520 ref1 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run1) 

1521 ref2 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-G"}, run=run2) 

1522 ref3 = butler.put(metric, datasetType, {"instrument": "Cam1", "physical_filter": "Cam1-R1"}, run=run1) 

1523 

1524 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1525 for ref, stored in many_stored.items(): 

1526 self.assertTrue(stored, f"Ref {ref} should be stored") 

1527 

1528 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1529 for ref, exists in many_exists.items(): 

1530 self.assertTrue(exists, f"Checking ref {ref} exists.") 

1531 self.assertEqual(exists, DatasetExistence.VERIFIED, f"Ref {ref} should be stored") 

1532 

1533 # Simple prune. 

1534 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1535 self.assertFalse(butler.exists(ref1.datasetType, ref1.dataId, collections=run1)) 

1536 

1537 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1538 for ref, stored in many_stored.items(): 

1539 self.assertFalse(stored, f"Ref {ref} should not be stored") 

1540 

1541 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1542 for ref, exists in many_exists.items(): 

1543 self.assertEqual(exists, DatasetExistence.UNRECOGNIZED, f"Ref {ref} should not be stored") 

1544 

1545 # Put data back. 

1546 ref1_new = butler.put(metric, ref1) 

1547 self.assertEqual(ref1_new, ref1) # Reuses original ID. 

1548 ref2 = butler.put(metric, ref2) 

1549 

1550 many_stored = butler.stored_many([ref1, ref2, ref3]) 

1551 self.assertTrue(many_stored[ref1]) 

1552 self.assertTrue(many_stored[ref2]) 

1553 self.assertFalse(many_stored[ref3]) 

1554 

1555 ref3 = butler.put(metric, ref3) 

1556 

1557 many_exists = butler._exists_many([ref1, ref2, ref3]) 

1558 for ref, exists in many_exists.items(): 

1559 self.assertTrue(exists, f"Ref {ref} should not be stored") 

1560 

1561 # Clear out the datasets from registry and start again. 

1562 refs = [ref1, ref2, ref3] 

1563 butler.pruneDatasets(refs, purge=True, unstore=True) 

1564 for ref in refs: 

1565 butler.put(metric, ref) 

1566 

1567 # Confirm we can retrieve deferred. 

1568 dref1 = butler.getDeferred(ref1) # known and exists 

1569 metric1 = dref1.get() 

1570 self.assertEqual(metric1, metric) 

1571 

1572 # Test different forms of file availability. 

1573 # Need to be in a state where: 

1574 # - one ref just has registry record. 

1575 # - one ref has a missing file but a datastore record. 

1576 # - one ref has a missing datastore record but file is there. 

1577 # - one ref does not exist anywhere. 

1578 # Do not need to test a ref that has everything since that is tested 

1579 # above. 

1580 ref0 = DatasetRef( 

1581 datasetType, 

1582 DataCoordinate.standardize( 

1583 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, universe=butler.dimensions 

1584 ), 

1585 run=run1, 

1586 ) 

1587 

1588 # Delete from datastore and retain in Registry. 

1589 butler.pruneDatasets([ref1], purge=False, unstore=True, disassociate=False) 

1590 

1591 # File has been removed. 

1592 uri2 = butler.getURI(ref2) 

1593 uri2.remove() 

1594 

1595 # Datastore has lost track. 

1596 butler._datastore.forget([ref3]) 

1597 

1598 # First test with a standard butler. 

1599 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1600 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1601 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1602 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1603 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED) 

1604 

1605 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=False) 

1606 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1607 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1608 self.assertEqual(exists_many[ref2], DatasetExistence.KNOWN) 

1609 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ASSUMED) 

1610 self.assertTrue(exists_many[ref2]) 

1611 

1612 # Check that per-ref query gives the same answer as many query. 

1613 for ref, exists in exists_many.items(): 

1614 self.assertEqual(butler.exists(ref, full_check=False), exists) 

1615 

1616 # Get deferred checks for existence before it allows it to be 

1617 # retrieved. 

1618 with self.assertRaises(LookupError): 

1619 butler.getDeferred(ref3) # not known, file exists 

1620 dref2 = butler.getDeferred(ref2) # known but file missing 

1621 with self.assertRaises(FileNotFoundError): 

1622 dref2.get() 

1623 

1624 # Test again with a trusting butler. 

1625 butler._datastore.trustGetRequest = True 

1626 exists_many = butler._exists_many([ref0, ref1, ref2, ref3], full_check=True) 

1627 self.assertEqual(exists_many[ref0], DatasetExistence.UNRECOGNIZED) 

1628 self.assertEqual(exists_many[ref1], DatasetExistence.RECORDED) 

1629 self.assertEqual(exists_many[ref2], DatasetExistence.RECORDED | DatasetExistence.DATASTORE) 

1630 self.assertEqual(exists_many[ref3], DatasetExistence.RECORDED | DatasetExistence._ARTIFACT) 

1631 

1632 # When trusting we can get a deferred dataset handle that is not 

1633 # known but does exist. 

1634 dref3 = butler.getDeferred(ref3) 

1635 metric3 = dref3.get() 

1636 self.assertEqual(metric3, metric) 

1637 

1638 # Check that per-ref query gives the same answer as many query. 

1639 for ref, exists in exists_many.items(): 

1640 self.assertEqual(butler.exists(ref, full_check=True), exists) 

1641 

1642 # Create a ref that surprisingly has the UUID of an existing ref 

1643 # but is not the same. 

1644 ref_bad = DatasetRef(datasetType, dataId=ref3.dataId, run=ref3.run, id=ref2.id) 

1645 with self.assertRaises(ValueError): 

1646 butler.exists(ref_bad) 

1647 

1648 # Create a ref that has a compatible storage class. 

1649 ref_compat = ref2.overrideStorageClass("StructuredDataDict") 

1650 exists = butler.exists(ref_compat) 

1651 self.assertEqual(exists, exists_many[ref2]) 

1652 

1653 # Remove everything and start from scratch. 

1654 butler._datastore.trustGetRequest = False 

1655 butler.pruneDatasets(refs, purge=True, unstore=True) 

1656 for ref in refs: 

1657 butler.put(metric, ref) 

1658 

1659 # These tests mess directly with the trash table and can leave the 

1660 # datastore in an odd state. Do them at the end. 

1661 # Check that in normal mode, deleting the record will lead to 

1662 # trash not touching the file. 

1663 uri1 = butler.getURI(ref1) 

1664 butler._datastore.bridge.moveToTrash([ref1], transaction=None) # Update the dataset_location table 

1665 butler._datastore.forget([ref1]) 

1666 butler._datastore.trash(ref1) 

1667 butler._datastore.emptyTrash() 

1668 self.assertTrue(uri1.exists()) 

1669 uri1.remove() # Clean it up. 

1670 

1671 # Simulate execution butler setup by deleting the datastore 

1672 # record but keeping the file around and trusting. 

1673 butler._datastore.trustGetRequest = True 

1674 uris = butler.get_many_uris([ref2, ref3]) 

1675 uri2 = uris[ref2].primaryURI 

1676 uri3 = uris[ref3].primaryURI 

1677 self.assertTrue(uri2.exists()) 

1678 self.assertTrue(uri3.exists()) 

1679 

1680 # Remove the datastore record. 

1681 butler._datastore.bridge.moveToTrash([ref2], transaction=None) # Update the dataset_location table 

1682 butler._datastore.forget([ref2]) 

1683 self.assertTrue(uri2.exists()) 

1684 butler._datastore.trash([ref2, ref3]) 

1685 # Immediate removal for ref2 file 

1686 self.assertFalse(uri2.exists()) 

1687 # But ref3 has to wait for the empty. 

1688 self.assertTrue(uri3.exists()) 

1689 butler._datastore.emptyTrash() 

1690 self.assertFalse(uri3.exists()) 

1691 

1692 # Clear out the datasets from registry. 

1693 butler.pruneDatasets([ref1, ref2, ref3], purge=True, unstore=True) 

1694 

1695 def testPytypeCoercion(self) -> None: 

1696 """Test python type coercion on Butler.get and put.""" 

1697 # Store some data with the normal example storage class. 

1698 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1699 datasetTypeName = "test_metric" 

1700 butler = self.runPutGetTest(storageClass, datasetTypeName) 

1701 

1702 dataId = {"instrument": "DummyCamComp", "visit": 423} 

1703 metric = butler.get(datasetTypeName, dataId=dataId) 

1704 self.assertEqual(get_full_type_name(metric), "lsst.daf.butler.tests.MetricsExample") 

1705 

1706 datasetType_ori = butler.registry.getDatasetType(datasetTypeName) 

1707 self.assertEqual(datasetType_ori.storageClass.name, "StructuredDataNoComponents") 

1708 

1709 # Now need to hack the registry dataset type definition. 

1710 # There is no API for this. 

1711 assert isinstance(butler._registry, SqlRegistry) 

1712 manager = butler._registry._managers.datasets 

1713 assert hasattr(manager, "_db") and hasattr(manager, "_static") 

1714 manager._db.update( 

1715 manager._static.dataset_type, 

1716 {"name": datasetTypeName}, 

1717 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataNoComponentsModel"}, 

1718 ) 

1719 

1720 # Force reset of dataset type cache 

1721 butler.registry.refresh() 

1722 

1723 datasetType_new = butler.registry.getDatasetType(datasetTypeName) 

1724 self.assertEqual(datasetType_new.name, datasetType_ori.name) 

1725 self.assertEqual(datasetType_new.storageClass.name, "StructuredDataNoComponentsModel") 

1726 

1727 metric_model = butler.get(datasetTypeName, dataId=dataId) 

1728 self.assertNotEqual(type(metric_model), type(metric)) 

1729 self.assertEqual(get_full_type_name(metric_model), "lsst.daf.butler.tests.MetricsExampleModel") 

1730 

1731 # Put the model and read it back to show that everything now 

1732 # works as normal. 

1733 metric_ref = butler.put(metric_model, datasetTypeName, dataId=dataId, visit=424) 

1734 metric_model_new = butler.get(metric_ref) 

1735 self.assertEqual(metric_model_new, metric_model) 

1736 

1737 # Hack the storage class again to something that will fail on the 

1738 # get with no conversion class. 

1739 manager._db.update( 

1740 manager._static.dataset_type, 

1741 {"name": datasetTypeName}, 

1742 {datasetTypeName: datasetTypeName, "storage_class": "StructuredDataListYaml"}, 

1743 ) 

1744 butler.registry.refresh() 

1745 

1746 with self.assertRaises(ValueError): 

1747 butler.get(datasetTypeName, dataId=dataId) 

1748 

1749 

1750@unittest.skipUnless(testing is not None, "testing.postgresql module not found") 

1751class PostgresPosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1752 """PosixDatastore specialization of a butler using Postgres""" 

1753 

1754 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1755 fullConfigKey = ".datastore.formatters" 

1756 validationCanFail = True 

1757 datastoreStr = ["/tmp"] 

1758 datastoreName = [f"FileDatastore@{BUTLER_ROOT_TAG}"] 

1759 registryStr = "PostgreSQL@test" 

1760 postgresql: Any 

1761 

1762 @staticmethod 

1763 def _handler(postgresql: Any) -> None: 

1764 engine = sqlalchemy.engine.create_engine(postgresql.url()) 

1765 with engine.begin() as connection: 

1766 connection.execute(sqlalchemy.text("CREATE EXTENSION btree_gist;")) 

1767 

1768 @classmethod 

1769 def setUpClass(cls) -> None: 

1770 # Create the postgres test server. 

1771 cls.postgresql = testing.postgresql.PostgresqlFactory( 

1772 cache_initialized_db=True, on_initialized=cls._handler 

1773 ) 

1774 super().setUpClass() 

1775 

1776 @classmethod 

1777 def tearDownClass(cls) -> None: 

1778 # Clean up any lingering SQLAlchemy engines/connections 

1779 # so they're closed before we shut down the server. 

1780 gc.collect() 

1781 cls.postgresql.clear_cache() 

1782 super().tearDownClass() 

1783 

1784 def setUp(self) -> None: 

1785 self.server = self.postgresql() 

1786 

1787 # Need to add a registry section to the config. 

1788 self._temp_config = False 

1789 config = Config(self.configFile) 

1790 config["registry", "db"] = self.server.url() 

1791 with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as fh: 

1792 config.dump(fh) 

1793 self.configFile = fh.name 

1794 self._temp_config = True 

1795 super().setUp() 

1796 

1797 def tearDown(self) -> None: 

1798 self.server.stop() 

1799 if self._temp_config and os.path.exists(self.configFile): 

1800 os.remove(self.configFile) 

1801 super().tearDown() 

1802 

1803 def testMakeRepo(self) -> None: 

1804 # The base class test assumes that it's using sqlite and assumes 

1805 # the config file is acceptable to sqlite. 

1806 raise unittest.SkipTest("Postgres config is not compatible with this test.") 

1807 

1808 

1809class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): 

1810 """InMemoryDatastore specialization of a butler""" 

1811 

1812 configFile = os.path.join(TESTDIR, "config/basic/butler-inmemory.yaml") 

1813 fullConfigKey = None 

1814 useTempRoot = False 

1815 validationCanFail = False 

1816 datastoreStr = ["datastore='InMemory"] 

1817 datastoreName = ["InMemoryDatastore@"] 

1818 registryStr = "/gen3.sqlite3" 

1819 

1820 def testIngest(self) -> None: 

1821 pass 

1822 

1823 

1824class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1825 """PosixDatastore specialization""" 

1826 

1827 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

1828 fullConfigKey = ".datastore.datastores.1.formatters" 

1829 validationCanFail = True 

1830 datastoreStr = ["datastore='InMemory", "/FileDatastore_1/,", "/FileDatastore_2/'"] 

1831 datastoreName = [ 

1832 "InMemoryDatastore@", 

1833 f"FileDatastore@{BUTLER_ROOT_TAG}/FileDatastore_1", 

1834 "SecondDatastore", 

1835 ] 

1836 registryStr = "/gen3.sqlite3" 

1837 

1838 

1839class ButlerExplicitRootTestCase(PosixDatastoreButlerTestCase): 

1840 """Test that a yaml file in one location can refer to a root in another.""" 

1841 

1842 datastoreStr = ["dir1"] 

1843 # Disable the makeRepo test since we are deliberately not using 

1844 # butler.yaml as the config name. 

1845 fullConfigKey = None 

1846 

1847 def setUp(self) -> None: 

1848 self.root = makeTestTempDir(TESTDIR) 

1849 

1850 # Make a new repository in one place 

1851 self.dir1 = os.path.join(self.root, "dir1") 

1852 Butler.makeRepo(self.dir1, config=Config(self.configFile)) 

1853 

1854 # Move the yaml file to a different place and add a "root" 

1855 self.dir2 = os.path.join(self.root, "dir2") 

1856 os.makedirs(self.dir2, exist_ok=True) 

1857 configFile1 = os.path.join(self.dir1, "butler.yaml") 

1858 config = Config(configFile1) 

1859 config["root"] = self.dir1 

1860 configFile2 = os.path.join(self.dir2, "butler2.yaml") 

1861 config.dumpToUri(configFile2) 

1862 os.remove(configFile1) 

1863 self.tmpConfigFile = configFile2 

1864 

1865 def testFileLocations(self) -> None: 

1866 self.assertNotEqual(self.dir1, self.dir2) 

1867 self.assertTrue(os.path.exists(os.path.join(self.dir2, "butler2.yaml"))) 

1868 self.assertFalse(os.path.exists(os.path.join(self.dir1, "butler.yaml"))) 

1869 self.assertTrue(os.path.exists(os.path.join(self.dir1, "gen3.sqlite3"))) 

1870 

1871 

1872class ButlerMakeRepoOutfileTestCase(ButlerPutGetTests, unittest.TestCase): 

1873 """Test that a config file created by makeRepo outside of repo works.""" 

1874 

1875 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1876 

1877 def setUp(self) -> None: 

1878 self.root = makeTestTempDir(TESTDIR) 

1879 self.root2 = makeTestTempDir(TESTDIR) 

1880 

1881 self.tmpConfigFile = os.path.join(self.root2, "different.yaml") 

1882 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1883 

1884 def tearDown(self) -> None: 

1885 if os.path.exists(self.root2): 

1886 shutil.rmtree(self.root2, ignore_errors=True) 

1887 super().tearDown() 

1888 

1889 def testConfigExistence(self) -> None: 

1890 c = Config(self.tmpConfigFile) 

1891 uri_config = ResourcePath(c["root"]) 

1892 uri_expected = ResourcePath(self.root, forceDirectory=True) 

1893 self.assertEqual(uri_config.geturl(), uri_expected.geturl()) 

1894 self.assertNotIn(":", uri_config.path, "Check for URI concatenated with normal path") 

1895 

1896 def testPutGet(self) -> None: 

1897 storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") 

1898 self.runPutGetTest(storageClass, "test_metric") 

1899 

1900 

1901class ButlerMakeRepoOutfileDirTestCase(ButlerMakeRepoOutfileTestCase): 

1902 """Test that a config file created by makeRepo outside of repo works.""" 

1903 

1904 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1905 

1906 def setUp(self) -> None: 

1907 self.root = makeTestTempDir(TESTDIR) 

1908 self.root2 = makeTestTempDir(TESTDIR) 

1909 

1910 self.tmpConfigFile = self.root2 

1911 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1912 

1913 def testConfigExistence(self) -> None: 

1914 # Append the yaml file else Config constructor does not know the file 

1915 # type. 

1916 self.tmpConfigFile = os.path.join(self.tmpConfigFile, "butler.yaml") 

1917 super().testConfigExistence() 

1918 

1919 

1920class ButlerMakeRepoOutfileUriTestCase(ButlerMakeRepoOutfileTestCase): 

1921 """Test that a config file created by makeRepo outside of repo works.""" 

1922 

1923 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1924 

1925 def setUp(self) -> None: 

1926 self.root = makeTestTempDir(TESTDIR) 

1927 self.root2 = makeTestTempDir(TESTDIR) 

1928 

1929 self.tmpConfigFile = ResourcePath(os.path.join(self.root2, "something.yaml")).geturl() 

1930 Butler.makeRepo(self.root, config=Config(self.configFile), outfile=self.tmpConfigFile) 

1931 

1932 

1933@unittest.skipIf(not boto3, "Warning: boto3 AWS SDK not found!") 

1934class S3DatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): 

1935 """S3Datastore specialization of a butler; an S3 storage Datastore + 

1936 a local in-memory SqlRegistry. 

1937 """ 

1938 

1939 configFile = os.path.join(TESTDIR, "config/basic/butler-s3store.yaml") 

1940 fullConfigKey = None 

1941 validationCanFail = True 

1942 

1943 bucketName = "anybucketname" 

1944 """Name of the Bucket that will be used in the tests. The name is read from 

1945 the config file used with the tests during set-up. 

1946 """ 

1947 

1948 root = "butlerRoot/" 

1949 """Root repository directory expected to be used in case useTempRoot=False. 

1950 Otherwise the root is set to a 20 characters long randomly generated string 

1951 during set-up. 

1952 """ 

1953 

1954 datastoreStr = [f"datastore={root}"] 

1955 """Contains all expected root locations in a format expected to be 

1956 returned by Butler stringification. 

1957 """ 

1958 

1959 datastoreName = ["FileDatastore@s3://{bucketName}/{root}"] 

1960 """The expected format of the S3 Datastore string.""" 

1961 

1962 registryStr = "/gen3.sqlite3" 

1963 """Expected format of the Registry string.""" 

1964 

1965 mock_s3 = mock_s3() 

1966 """The mocked s3 interface from moto.""" 

1967 

1968 def genRoot(self) -> str: 

1969 """Return a random string of len 20 to serve as a root 

1970 name for the temporary bucket repo. 

1971 

1972 This is equivalent to tempfile.mkdtemp as this is what self.root 

1973 becomes when useTempRoot is True. 

1974 """ 

1975 rndstr = "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)) 

1976 return rndstr + "/" 

1977 

1978 def setUp(self) -> None: 

1979 config = Config(self.configFile) 

1980 uri = ResourcePath(config[".datastore.datastore.root"]) 

1981 self.bucketName = uri.netloc 

1982 

1983 # Enable S3 mocking of tests. 

1984 self.mock_s3.start() 

1985 

1986 # set up some fake credentials if they do not exist 

1987 self.usingDummyCredentials = setAwsEnvCredentials() 

1988 

1989 if self.useTempRoot: 

1990 self.root = self.genRoot() 

1991 rooturi = f"s3://{self.bucketName}/{self.root}" 

1992 config.update({"datastore": {"datastore": {"root": rooturi}}}) 

1993 

1994 # need local folder to store registry database 

1995 self.reg_dir = makeTestTempDir(TESTDIR) 

1996 config["registry", "db"] = f"sqlite:///{self.reg_dir}/gen3.sqlite3" 

1997 

1998 # MOTO needs to know that we expect Bucket bucketname to exist 

1999 # (this used to be the class attribute bucketName) 

2000 s3 = boto3.resource("s3") 

2001 s3.create_bucket(Bucket=self.bucketName) 

2002 

2003 self.datastoreStr = [f"datastore='{rooturi}'"] 

2004 self.datastoreName = [f"FileDatastore@{rooturi}"] 

2005 Butler.makeRepo(rooturi, config=config, forceConfigRoot=False) 

2006 self.tmpConfigFile = posixpath.join(rooturi, "butler.yaml") 

2007 

2008 def tearDown(self) -> None: 

2009 s3 = boto3.resource("s3") 

2010 bucket = s3.Bucket(self.bucketName) 

2011 try: 

2012 bucket.objects.all().delete() 

2013 except botocore.exceptions.ClientError as e: 

2014 if e.response["Error"]["Code"] == "404": 

2015 # the key was not reachable - pass 

2016 pass 

2017 else: 

2018 raise 

2019 

2020 bucket = s3.Bucket(self.bucketName) 

2021 bucket.delete() 

2022 

2023 # Stop the S3 mock. 

2024 self.mock_s3.stop() 

2025 

2026 # unset any potentially set dummy credentials 

2027 if self.usingDummyCredentials: 

2028 unsetAwsEnvCredentials() 

2029 

2030 if self.reg_dir is not None and os.path.exists(self.reg_dir): 

2031 shutil.rmtree(self.reg_dir, ignore_errors=True) 

2032 

2033 if self.useTempRoot and os.path.exists(self.root): 

2034 shutil.rmtree(self.root, ignore_errors=True) 

2035 

2036 super().tearDown() 

2037 

2038 

2039class PosixDatastoreTransfers(unittest.TestCase): 

2040 """Test data transfers between butlers. 

2041 

2042 Test for different managers. UUID to UUID and integer to integer are 

2043 tested. UUID to integer is not supported since we do not currently 

2044 want to allow that. Integer to UUID is supported with the caveat 

2045 that UUID4 will be generated and this will be incorrect for raw 

2046 dataset types. The test ignores that. 

2047 """ 

2048 

2049 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2050 storageClassFactory: StorageClassFactory 

2051 

2052 @classmethod 

2053 def setUpClass(cls) -> None: 

2054 cls.storageClassFactory = StorageClassFactory() 

2055 cls.storageClassFactory.addFromConfig(cls.configFile) 

2056 

2057 def setUp(self) -> None: 

2058 self.root = makeTestTempDir(TESTDIR) 

2059 self.config = Config(self.configFile) 

2060 

2061 def tearDown(self) -> None: 

2062 removeTestTempDir(self.root) 

2063 

2064 def create_butler(self, manager: str, label: str) -> Butler: 

2065 config = Config(self.configFile) 

2066 config["registry", "managers", "datasets"] = manager 

2067 return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) 

2068 

2069 def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: 

2070 default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

2071 if manager1 is None: 

2072 manager1 = default 

2073 if manager2 is None: 

2074 manager2 = default 

2075 self.source_butler = self.create_butler(manager1, "1") 

2076 self.target_butler = self.create_butler(manager2, "2") 

2077 

2078 def testTransferUuidToUuid(self) -> None: 

2079 self.create_butlers() 

2080 self.assertButlerTransfers() 

2081 

2082 def _enable_trust(self, datastore: Datastore) -> None: 

2083 datastores = getattr(datastore, "datastores", [datastore]) 

2084 for this_datastore in datastores: 

2085 if hasattr(this_datastore, "trustGetRequest"): 

2086 this_datastore.trustGetRequest = True 

2087 

2088 def testTransferMissing(self) -> None: 

2089 """Test transfers where datastore records are missing. 

2090 

2091 This is how execution butler works. 

2092 """ 

2093 self.create_butlers() 

2094 

2095 # Configure the source butler to allow trust. 

2096 self._enable_trust(self.source_butler._datastore) 

2097 

2098 self.assertButlerTransfers(purge=True) 

2099 

2100 def testTransferMissingDisassembly(self) -> None: 

2101 """Test transfers where datastore records are missing. 

2102 

2103 This is how execution butler works. 

2104 """ 

2105 self.create_butlers() 

2106 

2107 # Configure the source butler to allow trust. 

2108 self._enable_trust(self.source_butler._datastore) 

2109 

2110 # Test disassembly. 

2111 self.assertButlerTransfers(purge=True, storageClassName="StructuredComposite") 

2112 

2113 def testAbsoluteURITransferDirect(self) -> None: 

2114 """Test transfer using an absolute URI.""" 

2115 self._absolute_transfer("auto") 

2116 

2117 def testAbsoluteURITransferCopy(self) -> None: 

2118 """Test transfer using an absolute URI.""" 

2119 self._absolute_transfer("copy") 

2120 

2121 def _absolute_transfer(self, transfer: str) -> None: 

2122 self.create_butlers() 

2123 

2124 storageClassName = "StructuredData" 

2125 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2126 datasetTypeName = "random_data" 

2127 run = "run1" 

2128 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2129 

2130 dimensions = self.source_butler.dimensions.extract(()) 

2131 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2132 self.source_butler.registry.registerDatasetType(datasetType) 

2133 

2134 metrics = makeExampleMetrics() 

2135 with ResourcePath.temporary_uri(suffix=".json") as temp: 

2136 dataId = DataCoordinate.makeEmpty(self.source_butler.dimensions) 

2137 source_refs = [DatasetRef(datasetType, dataId, run=run)] 

2138 temp.write(json.dumps(metrics.exportAsDict()).encode()) 

2139 dataset = FileDataset(path=temp, refs=source_refs) 

2140 self.source_butler.ingest(dataset, transfer="direct") 

2141 

2142 self.target_butler.transfer_from( 

2143 self.source_butler, dataset.refs, register_dataset_types=True, transfer=transfer 

2144 ) 

2145 

2146 uri = self.target_butler.getURI(dataset.refs[0]) 

2147 if transfer == "auto": 

2148 self.assertEqual(uri, temp) 

2149 else: 

2150 self.assertNotEqual(uri, temp) 

2151 

2152 def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "StructuredData") -> None: 

2153 """Test that a run can be transferred to another butler.""" 

2154 storageClass = self.storageClassFactory.getStorageClass(storageClassName) 

2155 datasetTypeName = "random_data" 

2156 

2157 # Test will create 3 collections and we will want to transfer 

2158 # two of those three. 

2159 runs = ["run1", "run2", "other"] 

2160 

2161 # Also want to use two different dataset types to ensure that 

2162 # grouping works. 

2163 datasetTypeNames = ["random_data", "random_data_2"] 

2164 

2165 # Create the run collections in the source butler. 

2166 for run in runs: 

2167 self.source_butler.registry.registerCollection(run, CollectionType.RUN) 

2168 

2169 # Create dimensions in source butler. 

2170 n_exposures = 30 

2171 self.source_butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) 

2172 self.source_butler.registry.insertDimensionData( 

2173 "physical_filter", {"instrument": "DummyCamComp", "name": "d-r", "band": "R"} 

2174 ) 

2175 self.source_butler.registry.insertDimensionData( 

2176 "detector", {"instrument": "DummyCamComp", "id": 1, "full_name": "det1"} 

2177 ) 

2178 

2179 for i in range(n_exposures): 

2180 self.source_butler.registry.insertDimensionData( 

2181 "exposure", 

2182 {"instrument": "DummyCamComp", "id": i, "obs_id": f"exp{i}", "physical_filter": "d-r"}, 

2183 ) 

2184 

2185 # Create dataset types in the source butler. 

2186 dimensions = self.source_butler.dimensions.extract(["instrument", "exposure"]) 

2187 for datasetTypeName in datasetTypeNames: 

2188 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2189 self.source_butler.registry.registerDatasetType(datasetType) 

2190 

2191 # Write a dataset to an unrelated run -- this will ensure that 

2192 # we are rewriting integer dataset ids in the target if necessary. 

2193 # Will not be relevant for UUID. 

2194 run = "distraction" 

2195 butler = Butler(butler=self.source_butler, run=run) 

2196 butler.put( 

2197 makeExampleMetrics(), 

2198 datasetTypeName, 

2199 exposure=1, 

2200 instrument="DummyCamComp", 

2201 physical_filter="d-r", 

2202 ) 

2203 

2204 # Write some example metrics to the source 

2205 butler = Butler(butler=self.source_butler) 

2206 

2207 # Set of DatasetRefs that should be in the list of refs to transfer 

2208 # but which will not be transferred. 

2209 deleted: set[DatasetRef] = set() 

2210 

2211 n_expected = 20 # Number of datasets expected to be transferred 

2212 source_refs = [] 

2213 for i in range(n_exposures): 

2214 # Put a third of datasets into each collection, only retain 

2215 # two thirds. 

2216 index = i % 3 

2217 run = runs[index] 

2218 datasetTypeName = datasetTypeNames[i % 2] 

2219 

2220 metric = MetricsExample( 

2221 summary={"counter": i}, output={"text": "metric"}, data=[2 * x for x in range(i)] 

2222 ) 

2223 dataId = {"exposure": i, "instrument": "DummyCamComp", "physical_filter": "d-r"} 

2224 ref = butler.put(metric, datasetTypeName, dataId=dataId, run=run) 

2225 

2226 # Remove the datastore record using low-level API, but only 

2227 # for a specific index. 

2228 if purge and index == 1: 

2229 # For one of these delete the file as well. 

2230 # This allows the "missing" code to filter the 

2231 # file out. 

2232 # Access the individual datastores. 

2233 datastores = [] 

2234 if hasattr(butler._datastore, "datastores"): 

2235 datastores.extend(butler._datastore.datastores) 

2236 else: 

2237 datastores.append(butler._datastore) 

2238 

2239 if not deleted: 

2240 # For a chained datastore we need to remove 

2241 # files in each chain. 

2242 for datastore in datastores: 

2243 # The file might not be known to the datastore 

2244 # if constraints are used. 

2245 try: 

2246 primary, uris = datastore.getURIs(ref) 

2247 except FileNotFoundError: 

2248 continue 

2249 if primary and primary.scheme != "mem": 

2250 primary.remove() 

2251 for uri in uris.values(): 

2252 if uri.scheme != "mem": 

2253 uri.remove() 

2254 n_expected -= 1 

2255 deleted.add(ref) 

2256 

2257 # Remove the datastore record. 

2258 for datastore in datastores: 

2259 if hasattr(datastore, "removeStoredItemInfo"): 

2260 datastore.removeStoredItemInfo(ref) 

2261 

2262 if index < 2: 

2263 source_refs.append(ref) 

2264 if ref not in deleted: 

2265 new_metric = butler.get(ref) 

2266 self.assertEqual(new_metric, metric) 

2267 

2268 # Create some bad dataset types to ensure we check for inconsistent 

2269 # definitions. 

2270 badStorageClass = self.storageClassFactory.getStorageClass("StructuredDataList") 

2271 for datasetTypeName in datasetTypeNames: 

2272 datasetType = DatasetType(datasetTypeName, dimensions, badStorageClass) 

2273 self.target_butler.registry.registerDatasetType(datasetType) 

2274 with self.assertRaises(ConflictingDefinitionError) as cm: 

2275 self.target_butler.transfer_from(self.source_butler, source_refs) 

2276 self.assertIn("dataset type differs", str(cm.exception)) 

2277 

2278 # And remove the bad definitions. 

2279 for datasetTypeName in datasetTypeNames: 

2280 self.target_butler.registry.removeDatasetType(datasetTypeName) 

2281 

2282 # Transfer without creating dataset types should fail. 

2283 with self.assertRaises(KeyError): 

2284 self.target_butler.transfer_from(self.source_butler, source_refs) 

2285 

2286 # Transfer without creating dimensions should fail. 

2287 with self.assertRaises(ConflictingDefinitionError) as cm: 

2288 self.target_butler.transfer_from(self.source_butler, source_refs, register_dataset_types=True) 

2289 self.assertIn("dimension", str(cm.exception)) 

2290 

2291 # The failed transfer above leaves registry in an inconsistent 

2292 # state because the run is created but then rolled back without 

2293 # the collection cache being cleared. For now force a refresh. 

2294 # Can remove with DM-35498. 

2295 self.target_butler.registry.refresh() 

2296 

2297 # Now transfer them to the second butler, including dimensions. 

2298 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2299 transferred = self.target_butler.transfer_from( 

2300 self.source_butler, 

2301 source_refs, 

2302 register_dataset_types=True, 

2303 transfer_dimensions=True, 

2304 ) 

2305 self.assertEqual(len(transferred), n_expected) 

2306 log_output = ";".join(log_cm.output) 

2307 

2308 # A ChainedDatastore will use the in-memory datastore for mexists 

2309 # so we can not rely on the mexists log message. 

2310 self.assertIn("Number of datastore records found in source", log_output) 

2311 self.assertIn("Creating output run", log_output) 

2312 

2313 # Do the transfer twice to ensure that it will do nothing extra. 

2314 # Only do this if purge=True because it does not work for int 

2315 # dataset_id. 

2316 if purge: 

2317 # This should not need to register dataset types. 

2318 transferred = self.target_butler.transfer_from(self.source_butler, source_refs) 

2319 self.assertEqual(len(transferred), n_expected) 

2320 

2321 # Also do an explicit low-level transfer to trigger some 

2322 # edge cases. 

2323 with self.assertLogs(level=logging.DEBUG) as log_cm: 

2324 self.target_butler._datastore.transfer_from(self.source_butler._datastore, source_refs) 

2325 log_output = ";".join(log_cm.output) 

2326 self.assertIn("no file artifacts exist", log_output) 

2327 

2328 with self.assertRaises((TypeError, AttributeError)): 

2329 self.target_butler._datastore.transfer_from(self.source_butler, source_refs) # type: ignore 

2330 

2331 with self.assertRaises(ValueError): 

2332 self.target_butler._datastore.transfer_from( 

2333 self.source_butler._datastore, source_refs, transfer="split" 

2334 ) 

2335 

2336 # Now try to get the same refs from the new butler. 

2337 for ref in source_refs: 

2338 if ref not in deleted: 

2339 new_metric = self.target_butler.get(ref) 

2340 old_metric = self.source_butler.get(ref) 

2341 self.assertEqual(new_metric, old_metric) 

2342 

2343 # Now prune run2 collection and create instead a CHAINED collection. 

2344 # This should block the transfer. 

2345 self.target_butler.removeRuns(["run2"], unstore=True) 

2346 self.target_butler.registry.registerCollection("run2", CollectionType.CHAINED) 

2347 with self.assertRaises(CollectionTypeError): 

2348 # Re-importing the run1 datasets can be problematic if they 

2349 # use integer IDs so filter those out. 

2350 to_transfer = [ref for ref in source_refs if ref.run == "run2"] 

2351 self.target_butler.transfer_from(self.source_butler, to_transfer) 

2352 

2353 

2354class ChainedDatastoreTransfers(PosixDatastoreTransfers): 

2355 """Test transfers using a chained datastore.""" 

2356 

2357 configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml") 

2358 

2359 

2360class NullDatastoreTestCase(unittest.TestCase): 

2361 """Test that we can fall back to a null datastore.""" 

2362 

2363 # Need a good config to create the repo. 

2364 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

2365 storageClassFactory: StorageClassFactory 

2366 

2367 @classmethod 

2368 def setUpClass(cls) -> None: 

2369 cls.storageClassFactory = StorageClassFactory() 

2370 cls.storageClassFactory.addFromConfig(cls.configFile) 

2371 

2372 def setUp(self) -> None: 

2373 """Create a new butler root for each test.""" 

2374 self.root = makeTestTempDir(TESTDIR) 

2375 Butler.makeRepo(self.root, config=Config(self.configFile)) 

2376 

2377 def tearDown(self) -> None: 

2378 removeTestTempDir(self.root) 

2379 

2380 def test_fallback(self) -> None: 

2381 # Read the butler config and mess with the datastore section. 

2382 bad_config = Config(os.path.join(self.root, "butler.yaml")) 

2383 bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" 

2384 

2385 with self.assertRaises(RuntimeError): 

2386 Butler(bad_config) 

2387 

2388 butler = Butler(bad_config, writeable=True, without_datastore=True) 

2389 self.assertIsInstance(butler._datastore, NullDatastore) 

2390 

2391 # Check that registry is working. 

2392 butler.registry.registerRun("MYRUN") 

2393 collections = butler.registry.queryCollections(...) 

2394 self.assertIn("MYRUN", set(collections)) 

2395 

2396 # Create a ref. 

2397 dimensions = butler.dimensions.extract([]) 

2398 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2399 datasetTypeName = "metric" 

2400 datasetType = DatasetType(datasetTypeName, dimensions, storageClass) 

2401 butler.registry.registerDatasetType(datasetType) 

2402 ref = DatasetRef(datasetType, {}, run="MYRUN") 

2403 

2404 # Check that datastore will complain. 

2405 with self.assertRaises(FileNotFoundError): 

2406 butler.get(ref) 

2407 with self.assertRaises(FileNotFoundError): 

2408 butler.getURI(ref) 

2409 

2410 

2411def setup_module(module: types.ModuleType) -> None: 

2412 """Set up the module for pytest.""" 

2413 clean_environment() 

2414 

2415 

2416if __name__ == "__main__": 

2417 clean_environment() 

2418 unittest.main()