Coverage for tests/test_datastore.py: 11%

1091 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-02 03:16 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import contextlib 

31import os 

32import pickle 

33import shutil 

34import tempfile 

35import time 

36import unittest 

37import unittest.mock 

38import uuid 

39from collections.abc import Callable, Iterator 

40from typing import Any, cast 

41 

42import lsst.utils.tests 

43import yaml 

44from lsst.daf.butler import ( 

45 Config, 

46 DataCoordinate, 

47 DatasetIdGenEnum, 

48 DatasetRef, 

49 DatasetType, 

50 DatasetTypeNotSupportedError, 

51 Datastore, 

52 DimensionUniverse, 

53 FileDataset, 

54 StorageClass, 

55 StorageClassFactory, 

56) 

57from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore 

58from lsst.daf.butler.datastore.cache_manager import ( 

59 DatastoreCacheManager, 

60 DatastoreCacheManagerConfig, 

61 DatastoreDisabledCacheManager, 

62) 

63from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo 

64from lsst.daf.butler.formatters.yaml import YamlFormatter 

65from lsst.daf.butler.tests import ( 

66 BadNoWriteFormatter, 

67 BadWriteFormatter, 

68 DatasetTestHelper, 

69 DatastoreTestHelper, 

70 DummyRegistry, 

71 MetricsExample, 

72 MetricsExampleDataclass, 

73 MetricsExampleModel, 

74) 

75from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

76from lsst.daf.butler.tests.utils import TestCaseMixin 

77from lsst.resources import ResourcePath 

78from lsst.utils import doImport 

79 

80TESTDIR = os.path.dirname(__file__) 

81 

82 

83def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

84 """Make example dataset that can be stored in butler.""" 

85 if use_none: 

86 array = None 

87 else: 

88 array = [563, 234, 456.7, 105, 2054, -1045] 

89 return MetricsExample( 

90 {"AM1": 5.2, "AM2": 30.6}, 

91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

92 array, 

93 ) 

94 

95 

96class TransactionTestError(Exception): 

97 """Specific error for transactions, to prevent misdiagnosing 

98 that might otherwise occur when a standard exception is used. 

99 """ 

100 

101 pass 

102 

103 

104class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

105 """Support routines for datastore testing""" 

106 

107 root: str | None = None 

108 universe: DimensionUniverse 

109 storageClassFactory: StorageClassFactory 

110 

111 @classmethod 

112 def setUpClass(cls) -> None: 

113 # Storage Classes are fixed for all datastores in these tests 

114 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

115 cls.storageClassFactory = StorageClassFactory() 

116 cls.storageClassFactory.addFromConfig(scConfigFile) 

117 

118 # Read the Datastore config so we can get the class 

119 # information (since we should not assume the constructor 

120 # name here, but rely on the configuration file itself) 

121 datastoreConfig = DatastoreConfig(cls.configFile) 

122 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

123 cls.universe = DimensionUniverse() 

124 

125 def setUp(self) -> None: 

126 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

127 

128 def tearDown(self) -> None: 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 

133class DatastoreTests(DatastoreTestsBase): 

134 """Some basic tests of a simple datastore.""" 

135 

136 hasUnsupportedPut = True 

137 rootKeys: tuple[str, ...] | None = None 

138 isEphemeral: bool = False 

139 validationCanFail: bool = False 

140 

141 def testConfigRoot(self) -> None: 

142 full = DatastoreConfig(self.configFile) 

143 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

144 newroot = "/random/location" 

145 self.datastoreType.setConfigRoot(newroot, config, full) 

146 if self.rootKeys: 

147 for k in self.rootKeys: 

148 self.assertIn(newroot, config[k]) 

149 

150 def testConstructor(self) -> None: 

151 datastore = self.makeDatastore() 

152 self.assertIsNotNone(datastore) 

153 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

154 

155 def testConfigurationValidation(self) -> None: 

156 datastore = self.makeDatastore() 

157 sc = self.storageClassFactory.getStorageClass("ThingOne") 

158 datastore.validateConfiguration([sc]) 

159 

160 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

161 if self.validationCanFail: 

162 with self.assertRaises(DatastoreValidationError): 

163 datastore.validateConfiguration([sc2], logFailures=True) 

164 

165 dimensions = self.universe.conform(("visit", "physical_filter")) 

166 dataId = { 

167 "instrument": "dummy", 

168 "visit": 52, 

169 "physical_filter": "V", 

170 "band": "v", 

171 "day_obs": 20250101, 

172 } 

173 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

174 datastore.validateConfiguration([ref]) 

175 

176 def testParameterValidation(self) -> None: 

177 """Check that parameters are validated""" 

178 sc = self.storageClassFactory.getStorageClass("ThingOne") 

179 dimensions = self.universe.conform(("visit", "physical_filter")) 

180 dataId = { 

181 "instrument": "dummy", 

182 "visit": 52, 

183 "physical_filter": "V", 

184 "band": "v", 

185 "day_obs": 20250101, 

186 } 

187 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

188 datastore = self.makeDatastore() 

189 data = {1: 2, 3: 4} 

190 datastore.put(data, ref) 

191 newdata = datastore.get(ref) 

192 self.assertEqual(data, newdata) 

193 with self.assertRaises(KeyError): 

194 newdata = datastore.get(ref, parameters={"missing": 5}) 

195 

196 def testBasicPutGet(self) -> None: 

197 metrics = makeExampleMetrics() 

198 datastore = self.makeDatastore() 

199 

200 # Create multiple storage classes for testing different formulations 

201 storageClasses = [ 

202 self.storageClassFactory.getStorageClass(sc) 

203 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

204 ] 

205 

206 dimensions = self.universe.conform(("visit", "physical_filter")) 

207 dataId = { 

208 "instrument": "dummy", 

209 "visit": 52, 

210 "physical_filter": "V", 

211 "band": "v", 

212 "day_obs": 20250101, 

213 } 

214 dataId2 = { 

215 "instrument": "dummy", 

216 "visit": 53, 

217 "physical_filter": "V", 

218 "band": "v", 

219 "day_obs": 20250101, 

220 } 

221 

222 for sc in storageClasses: 

223 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

224 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

225 

226 # Make sure that using getManyURIs without predicting before the 

227 # dataset has been put raises. 

228 with self.assertRaises(FileNotFoundError): 

229 datastore.getManyURIs([ref], predict=False) 

230 

231 # Make sure that using getManyURIs with predicting before the 

232 # dataset has been put predicts the URI. 

233 uris = datastore.getManyURIs([ref, ref2], predict=True) 

234 self.assertIn("52", uris[ref].primaryURI.geturl()) 

235 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

236 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

237 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

238 

239 datastore.put(metrics, ref) 

240 

241 # Does it exist? 

242 self.assertTrue(datastore.exists(ref)) 

243 self.assertTrue(datastore.knows(ref)) 

244 multi = datastore.knows_these([ref]) 

245 self.assertTrue(multi[ref]) 

246 multi = datastore.mexists([ref, ref2]) 

247 self.assertTrue(multi[ref]) 

248 self.assertFalse(multi[ref2]) 

249 

250 # Get 

251 metricsOut = datastore.get(ref, parameters=None) 

252 self.assertEqual(metrics, metricsOut) 

253 

254 uri = datastore.getURI(ref) 

255 self.assertEqual(uri.scheme, self.uriScheme) 

256 

257 uris = datastore.getManyURIs([ref]) 

258 self.assertEqual(len(uris), 1) 

259 ref, uri = uris.popitem() 

260 self.assertTrue(uri.primaryURI.exists()) 

261 self.assertFalse(uri.componentURIs) 

262 

263 # Get a component -- we need to construct new refs for them 

264 # with derived storage classes but with parent ID 

265 for comp in ("data", "output"): 

266 compRef = ref.makeComponentRef(comp) 

267 output = datastore.get(compRef) 

268 self.assertEqual(output, getattr(metricsOut, comp)) 

269 

270 uri = datastore.getURI(compRef) 

271 self.assertEqual(uri.scheme, self.uriScheme) 

272 

273 uris = datastore.getManyURIs([compRef]) 

274 self.assertEqual(len(uris), 1) 

275 

276 storageClass = sc 

277 

278 # Check that we can put a metric with None in a component and 

279 # get it back as None 

280 metricsNone = makeExampleMetrics(use_none=True) 

281 dataIdNone = { 

282 "instrument": "dummy", 

283 "visit": 54, 

284 "physical_filter": "V", 

285 "band": "v", 

286 "day_obs": 20250101, 

287 } 

288 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

289 datastore.put(metricsNone, refNone) 

290 

291 comp = "data" 

292 for comp in ("data", "output"): 

293 compRef = refNone.makeComponentRef(comp) 

294 output = datastore.get(compRef) 

295 self.assertEqual(output, getattr(metricsNone, comp)) 

296 

297 # Check that a put fails if the dataset type is not supported 

298 if self.hasUnsupportedPut: 

299 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

300 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

301 with self.assertRaises(DatasetTypeNotSupportedError): 

302 datastore.put(metrics, ref) 

303 

304 # These should raise 

305 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

306 with self.assertRaises(FileNotFoundError): 

307 # non-existing file 

308 datastore.get(ref) 

309 

310 # Get a URI from it 

311 uri = datastore.getURI(ref, predict=True) 

312 self.assertEqual(uri.scheme, self.uriScheme) 

313 

314 with self.assertRaises(FileNotFoundError): 

315 datastore.getURI(ref) 

316 

317 def testTrustGetRequest(self) -> None: 

318 """Check that we can get datasets that registry knows nothing about.""" 

319 datastore = self.makeDatastore() 

320 

321 # Skip test if the attribute is not defined 

322 if not hasattr(datastore, "trustGetRequest"): 

323 return 

324 

325 metrics = makeExampleMetrics() 

326 

327 i = 0 

328 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

329 i += 1 

330 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

331 

332 if sc_name == "StructuredComposite": 

333 disassembled = True 

334 else: 

335 disassembled = False 

336 

337 # Start datastore in default configuration of using registry 

338 datastore.trustGetRequest = False 

339 

340 # Create multiple storage classes for testing with or without 

341 # disassembly 

342 sc = self.storageClassFactory.getStorageClass(sc_name) 

343 dimensions = self.universe.conform(("visit", "physical_filter")) 

344 

345 dataId = { 

346 "instrument": "dummy", 

347 "visit": 52 + i, 

348 "physical_filter": "V", 

349 "band": "v", 

350 "day_obs": 20250101, 

351 } 

352 

353 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

354 datastore.put(metrics, ref) 

355 

356 # Does it exist? 

357 self.assertTrue(datastore.exists(ref)) 

358 self.assertTrue(datastore.knows(ref)) 

359 multi = datastore.knows_these([ref]) 

360 self.assertTrue(multi[ref]) 

361 multi = datastore.mexists([ref]) 

362 self.assertTrue(multi[ref]) 

363 

364 # Get 

365 metricsOut = datastore.get(ref) 

366 self.assertEqual(metrics, metricsOut) 

367 

368 # Get the URI(s) 

369 primaryURI, componentURIs = datastore.getURIs(ref) 

370 if disassembled: 

371 self.assertIsNone(primaryURI) 

372 self.assertEqual(len(componentURIs), 3) 

373 else: 

374 self.assertIn(datasetTypeName, primaryURI.path) 

375 self.assertFalse(componentURIs) 

376 

377 # Delete registry entry so now we are trusting 

378 datastore.removeStoredItemInfo(ref) 

379 

380 # Now stop trusting and check that things break 

381 datastore.trustGetRequest = False 

382 

383 # Does it exist? 

384 self.assertFalse(datastore.exists(ref)) 

385 self.assertFalse(datastore.knows(ref)) 

386 multi = datastore.knows_these([ref]) 

387 self.assertFalse(multi[ref]) 

388 multi = datastore.mexists([ref]) 

389 self.assertFalse(multi[ref]) 

390 

391 with self.assertRaises(FileNotFoundError): 

392 datastore.get(ref) 

393 

394 if sc_name != "StructuredDataNoComponents": 

395 with self.assertRaises(FileNotFoundError): 

396 datastore.get(ref.makeComponentRef("data")) 

397 

398 # URI should fail unless we ask for prediction 

399 with self.assertRaises(FileNotFoundError): 

400 datastore.getURIs(ref) 

401 

402 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

403 if disassembled: 

404 self.assertIsNone(predicted_primary) 

405 self.assertEqual(len(predicted_disassembled), 3) 

406 for uri in predicted_disassembled.values(): 

407 self.assertEqual(uri.fragment, "predicted") 

408 self.assertIn(datasetTypeName, uri.path) 

409 else: 

410 self.assertIn(datasetTypeName, predicted_primary.path) 

411 self.assertFalse(predicted_disassembled) 

412 self.assertEqual(predicted_primary.fragment, "predicted") 

413 

414 # Now enable registry-free trusting mode 

415 datastore.trustGetRequest = True 

416 

417 # Try again to get it 

418 metricsOut = datastore.get(ref) 

419 self.assertEqual(metricsOut, metrics) 

420 

421 # Does it exist? 

422 self.assertTrue(datastore.exists(ref)) 

423 

424 # Get a component 

425 if sc_name != "StructuredDataNoComponents": 

426 comp = "data" 

427 compRef = ref.makeComponentRef(comp) 

428 output = datastore.get(compRef) 

429 self.assertEqual(output, getattr(metrics, comp)) 

430 

431 # Get the URI -- if we trust this should work even without 

432 # enabling prediction. 

433 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

434 self.assertEqual(primaryURI2, primaryURI) 

435 self.assertEqual(componentURIs2, componentURIs) 

436 

437 # Check for compatible storage class. 

438 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

439 # Make new dataset ref with compatible storage class. 

440 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

441 

442 # Without `set_retrieve_dataset_type_method` it will fail to 

443 # find correct file. 

444 self.assertFalse(datastore.exists(ref_comp)) 

445 with self.assertRaises(FileNotFoundError): 

446 datastore.get(ref_comp) 

447 with self.assertRaises(FileNotFoundError): 

448 datastore.get(ref, storageClass="StructuredDataDictJson") 

449 

450 # Need a special method to generate stored dataset type. 

451 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

452 if name == ref.datasetType.name: 

453 return ref.datasetType 

454 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

455 

456 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

457 

458 # Storage class override with original dataset ref. 

459 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

460 self.assertIsInstance(metrics_as_dict, dict) 

461 

462 # get() should return a dict now. 

463 metrics_as_dict = datastore.get(ref_comp) 

464 self.assertIsInstance(metrics_as_dict, dict) 

465 

466 # exists() should work as well. 

467 self.assertTrue(datastore.exists(ref_comp)) 

468 

469 datastore.set_retrieve_dataset_type_method(None) 

470 

471 def testDisassembly(self) -> None: 

472 """Test disassembly within datastore.""" 

473 metrics = makeExampleMetrics() 

474 if self.isEphemeral: 

475 # in-memory datastore does not disassemble 

476 return 

477 

478 # Create multiple storage classes for testing different formulations 

479 # of composites. One of these will not disassemble to provide 

480 # a reference. 

481 storageClasses = [ 

482 self.storageClassFactory.getStorageClass(sc) 

483 for sc in ( 

484 "StructuredComposite", 

485 "StructuredCompositeTestA", 

486 "StructuredCompositeTestB", 

487 "StructuredCompositeReadComp", 

488 "StructuredData", # No disassembly 

489 "StructuredCompositeReadCompNoDisassembly", 

490 ) 

491 ] 

492 

493 # Create the test datastore 

494 datastore = self.makeDatastore() 

495 

496 # Dummy dataId 

497 dimensions = self.universe.conform(("visit", "physical_filter")) 

498 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

499 

500 for i, sc in enumerate(storageClasses): 

501 with self.subTest(storageClass=sc.name): 

502 # Create a different dataset type each time round 

503 # so that a test failure in this subtest does not trigger 

504 # a cascade of tests because of file clashes 

505 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

506 

507 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

508 

509 datastore.put(metrics, ref) 

510 

511 baseURI, compURIs = datastore.getURIs(ref) 

512 if disassembled: 

513 self.assertIsNone(baseURI) 

514 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

515 else: 

516 self.assertIsNotNone(baseURI) 

517 self.assertEqual(compURIs, {}) 

518 

519 metrics_get = datastore.get(ref) 

520 self.assertEqual(metrics_get, metrics) 

521 

522 # Retrieve the composite with read parameter 

523 stop = 4 

524 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

525 self.assertEqual(metrics_get.summary, metrics.summary) 

526 self.assertEqual(metrics_get.output, metrics.output) 

527 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

528 

529 # Retrieve a component 

530 data = datastore.get(ref.makeComponentRef("data")) 

531 self.assertEqual(data, metrics.data) 

532 

533 # On supported storage classes attempt to access a read 

534 # only component 

535 if "ReadComp" in sc.name: 

536 cRef = ref.makeComponentRef("counter") 

537 counter = datastore.get(cRef) 

538 self.assertEqual(counter, len(metrics.data)) 

539 

540 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

541 self.assertEqual(counter, stop) 

542 

543 datastore.remove(ref) 

544 

545 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

546 metrics = makeExampleMetrics() 

547 datastore = self.makeDatastore() 

548 # Put 

549 dimensions = self.universe.conform(("visit", "physical_filter")) 

550 sc = self.storageClassFactory.getStorageClass("StructuredData") 

551 refs = [] 

552 for i in range(n_refs): 

553 dataId = { 

554 "instrument": "dummy", 

555 "visit": 638 + i, 

556 "physical_filter": "U", 

557 "band": "u", 

558 "day_obs": 20250101, 

559 } 

560 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

561 datastore.put(metrics, ref) 

562 

563 # Does it exist? 

564 self.assertTrue(datastore.exists(ref)) 

565 

566 # Get 

567 metricsOut = datastore.get(ref) 

568 self.assertEqual(metrics, metricsOut) 

569 refs.append(ref) 

570 

571 return datastore, *refs 

572 

573 def testRemove(self) -> None: 

574 datastore, ref = self.prepDeleteTest() 

575 

576 # Remove 

577 datastore.remove(ref) 

578 

579 # Does it exist? 

580 self.assertFalse(datastore.exists(ref)) 

581 

582 # Do we now get a predicted URI? 

583 uri = datastore.getURI(ref, predict=True) 

584 self.assertEqual(uri.fragment, "predicted") 

585 

586 # Get should now fail 

587 with self.assertRaises(FileNotFoundError): 

588 datastore.get(ref) 

589 # Can only delete once 

590 with self.assertRaises(FileNotFoundError): 

591 datastore.remove(ref) 

592 

593 def testForget(self) -> None: 

594 datastore, ref = self.prepDeleteTest() 

595 

596 # Remove 

597 datastore.forget([ref]) 

598 

599 # Does it exist (as far as we know)? 

600 self.assertFalse(datastore.exists(ref)) 

601 

602 # Do we now get a predicted URI? 

603 uri = datastore.getURI(ref, predict=True) 

604 self.assertEqual(uri.fragment, "predicted") 

605 

606 # Get should now fail 

607 with self.assertRaises(FileNotFoundError): 

608 datastore.get(ref) 

609 

610 # Forgetting again is a silent no-op 

611 datastore.forget([ref]) 

612 

613 # Predicted URI should still point to the file. 

614 self.assertTrue(uri.exists()) 

615 

616 def testTransfer(self) -> None: 

617 metrics = makeExampleMetrics() 

618 

619 dimensions = self.universe.conform(("visit", "physical_filter")) 

620 dataId = { 

621 "instrument": "dummy", 

622 "visit": 2048, 

623 "physical_filter": "Uprime", 

624 "band": "u", 

625 "day_obs": 20250101, 

626 } 

627 

628 sc = self.storageClassFactory.getStorageClass("StructuredData") 

629 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

630 

631 inputDatastore = self.makeDatastore("test_input_datastore") 

632 outputDatastore = self.makeDatastore("test_output_datastore") 

633 

634 inputDatastore.put(metrics, ref) 

635 outputDatastore.transfer(inputDatastore, ref) 

636 

637 metricsOut = outputDatastore.get(ref) 

638 self.assertEqual(metrics, metricsOut) 

639 

640 def testBasicTransaction(self) -> None: 

641 datastore = self.makeDatastore() 

642 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

643 dimensions = self.universe.conform(("visit", "physical_filter")) 

644 nDatasets = 6 

645 dataIds = [ 

646 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v", "day_obs": 20250101} 

647 for i in range(nDatasets) 

648 ] 

649 data = [ 

650 ( 

651 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

652 makeExampleMetrics(), 

653 ) 

654 for dataId in dataIds 

655 ] 

656 succeed = data[: nDatasets // 2] 

657 fail = data[nDatasets // 2 :] 

658 # All datasets added in this transaction should continue to exist 

659 with datastore.transaction(): 

660 for ref, metrics in succeed: 

661 datastore.put(metrics, ref) 

662 # Whereas datasets added in this transaction should not 

663 with self.assertRaises(TransactionTestError): 

664 with datastore.transaction(): 

665 for ref, metrics in fail: 

666 datastore.put(metrics, ref) 

667 raise TransactionTestError("This should propagate out of the context manager") 

668 # Check for datasets that should exist 

669 for ref, metrics in succeed: 

670 # Does it exist? 

671 self.assertTrue(datastore.exists(ref)) 

672 # Get 

673 metricsOut = datastore.get(ref, parameters=None) 

674 self.assertEqual(metrics, metricsOut) 

675 # URI 

676 uri = datastore.getURI(ref) 

677 self.assertEqual(uri.scheme, self.uriScheme) 

678 # Check for datasets that should not exist 

679 for ref, _ in fail: 

680 # These should raise 

681 with self.assertRaises(FileNotFoundError): 

682 # non-existing file 

683 datastore.get(ref) 

684 with self.assertRaises(FileNotFoundError): 

685 datastore.getURI(ref) 

686 

687 def testNestedTransaction(self) -> None: 

688 datastore = self.makeDatastore() 

689 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

690 dimensions = self.universe.conform(("visit", "physical_filter")) 

691 metrics = makeExampleMetrics() 

692 

693 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101} 

694 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

695 datastore.put(metrics, refBefore) 

696 with self.assertRaises(TransactionTestError): 

697 with datastore.transaction(): 

698 dataId = { 

699 "instrument": "dummy", 

700 "visit": 1, 

701 "physical_filter": "V", 

702 "band": "v", 

703 "day_obs": 20250101, 

704 } 

705 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

706 datastore.put(metrics, refOuter) 

707 with datastore.transaction(): 

708 dataId = { 

709 "instrument": "dummy", 

710 "visit": 2, 

711 "physical_filter": "V", 

712 "band": "v", 

713 "day_obs": 20250101, 

714 } 

715 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

716 datastore.put(metrics, refInner) 

717 # All datasets should exist 

718 for ref in (refBefore, refOuter, refInner): 

719 metricsOut = datastore.get(ref, parameters=None) 

720 self.assertEqual(metrics, metricsOut) 

721 raise TransactionTestError("This should roll back the transaction") 

722 # Dataset(s) inserted before the transaction should still exist 

723 metricsOut = datastore.get(refBefore, parameters=None) 

724 self.assertEqual(metrics, metricsOut) 

725 # But all datasets inserted during the (rolled back) transaction 

726 # should be gone 

727 with self.assertRaises(FileNotFoundError): 

728 datastore.get(refOuter) 

729 with self.assertRaises(FileNotFoundError): 

730 datastore.get(refInner) 

731 

732 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

733 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

734 dimensions = self.universe.conform(("visit", "physical_filter")) 

735 metrics = makeExampleMetrics() 

736 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101} 

737 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

738 return metrics, ref 

739 

740 def runIngestTest(self, func: Callable[[MetricsExample, str, DatasetRef], None]) -> None: 

741 metrics, ref = self._prepareIngestTest() 

742 # The file will be deleted after the test. 

743 # For symlink tests this leads to a situation where the datastore 

744 # points to a file that does not exist. This will make os.path.exist 

745 # return False but then the new symlink will fail with 

746 # FileExistsError later in the code so the test still passes. 

747 with _temp_yaml_file(metrics._asdict()) as path: 

748 func(metrics, path, ref) 

749 

750 def testIngestNoTransfer(self) -> None: 

751 """Test ingesting existing files with no transfer.""" 

752 for mode in (None, "auto"): 

753 # Some datastores have auto but can't do in place transfer 

754 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

755 continue 

756 

757 with self.subTest(mode=mode): 

758 datastore = self.makeDatastore() 

759 

760 def succeed( 

761 obj: MetricsExample, 

762 path: str, 

763 ref: DatasetRef, 

764 mode: str | None = mode, 

765 datastore: Datastore = datastore, 

766 ) -> None: 

767 """Ingest a file already in the datastore root.""" 

768 # first move it into the root, and adjust the path 

769 # accordingly 

770 path = shutil.copy(path, datastore.root.ospath) 

771 path = os.path.relpath(path, start=datastore.root.ospath) 

772 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

773 self.assertEqual(obj, datastore.get(ref)) 

774 

775 def failInputDoesNotExist( 

776 obj: MetricsExample, 

777 path: str, 

778 ref: DatasetRef, 

779 mode: str | None = mode, 

780 datastore: Datastore = datastore, 

781 ) -> None: 

782 """Can't ingest files if we're given a bad path.""" 

783 with self.assertRaises(FileNotFoundError): 

784 datastore.ingest( 

785 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

786 ) 

787 self.assertFalse(datastore.exists(ref)) 

788 

789 def failOutsideRoot( 

790 obj: MetricsExample, 

791 path: str, 

792 ref: DatasetRef, 

793 mode: str | None = mode, 

794 datastore: Datastore = datastore, 

795 ) -> None: 

796 """Can't ingest files outside of datastore root unless 

797 auto. 

798 """ 

799 if mode == "auto": 

800 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

801 self.assertTrue(datastore.exists(ref)) 

802 else: 

803 with self.assertRaises(RuntimeError): 

804 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

805 self.assertFalse(datastore.exists(ref)) 

806 

807 def failNotImplemented( 

808 obj: MetricsExample, 

809 path: str, 

810 ref: DatasetRef, 

811 mode: str | None = mode, 

812 datastore: Datastore = datastore, 

813 ) -> None: 

814 with self.assertRaises(NotImplementedError): 

815 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

816 

817 if mode in self.ingestTransferModes: 

818 self.runIngestTest(failOutsideRoot) 

819 self.runIngestTest(failInputDoesNotExist) 

820 self.runIngestTest(succeed) 

821 else: 

822 self.runIngestTest(failNotImplemented) 

823 

824 def testIngestTransfer(self) -> None: 

825 """Test ingesting existing files after transferring them.""" 

826 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

827 with self.subTest(mode=mode): 

828 datastore = self.makeDatastore(mode) 

829 

830 def succeed( 

831 obj: MetricsExample, 

832 path: str, 

833 ref: DatasetRef, 

834 mode: str | None = mode, 

835 datastore: Datastore = datastore, 

836 ) -> None: 

837 """Ingest a file by transferring it to the template 

838 location. 

839 """ 

840 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

841 self.assertEqual(obj, datastore.get(ref)) 

842 file_exists = os.path.exists(path) 

843 if mode == "move": 

844 self.assertFalse(file_exists) 

845 else: 

846 self.assertTrue(file_exists) 

847 

848 def failInputDoesNotExist( 

849 obj: MetricsExample, 

850 path: str, 

851 ref: DatasetRef, 

852 mode: str | None = mode, 

853 datastore: Datastore = datastore, 

854 ) -> None: 

855 """Can't ingest files if we're given a bad path.""" 

856 with self.assertRaises(FileNotFoundError): 

857 # Ensure the file does not look like it is in 

858 # datastore for auto mode 

859 datastore.ingest( 

860 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

861 ) 

862 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

863 

864 def failNotImplemented( 

865 obj: MetricsExample, 

866 path: str, 

867 ref: DatasetRef, 

868 mode: str | None = mode, 

869 datastore: Datastore = datastore, 

870 ) -> None: 

871 with self.assertRaises(NotImplementedError): 

872 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

873 

874 if mode in self.ingestTransferModes: 

875 self.runIngestTest(failInputDoesNotExist) 

876 self.runIngestTest(succeed) 

877 else: 

878 self.runIngestTest(failNotImplemented) 

879 

880 def testIngestSymlinkOfSymlink(self) -> None: 

881 """Special test for symlink to a symlink ingest""" 

882 metrics, ref = self._prepareIngestTest() 

883 # The aim of this test is to create a dataset on disk, then 

884 # create a symlink to it and finally ingest the symlink such that 

885 # the symlink in the datastore points to the original dataset. 

886 for mode in ("symlink", "relsymlink"): 

887 if mode not in self.ingestTransferModes: 

888 continue 

889 

890 print(f"Trying mode {mode}") 

891 with _temp_yaml_file(metrics._asdict()) as realpath: 

892 with tempfile.TemporaryDirectory() as tmpdir: 

893 sympath = os.path.join(tmpdir, "symlink.yaml") 

894 os.symlink(os.path.realpath(realpath), sympath) 

895 

896 datastore = self.makeDatastore() 

897 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

898 

899 uri = datastore.getURI(ref) 

900 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

901 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

902 

903 linkTarget = os.readlink(uri.ospath) 

904 if mode == "relsymlink": 

905 self.assertFalse(os.path.isabs(linkTarget)) 

906 else: 

907 self.assertTrue(os.path.samefile(linkTarget, realpath)) 

908 

909 # Check that we can get the dataset back regardless of mode 

910 metric2 = datastore.get(ref) 

911 self.assertEqual(metric2, metrics) 

912 

913 # Cleanup the file for next time round loop 

914 # since it will get the same file name in store 

915 datastore.remove(ref) 

916 

917 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

918 datastore = self.makeDatastore(name) 

919 

920 # For now only the FileDatastore can be used for this test. 

921 # ChainedDatastore that only includes InMemoryDatastores have to be 

922 # skipped as well. 

923 for name in datastore.names: 

924 if not name.startswith("InMemoryDatastore"): 

925 break 

926 else: 

927 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

928 

929 metrics = makeExampleMetrics() 

930 dimensions = self.universe.conform(("visit", "physical_filter")) 

931 sc = self.storageClassFactory.getStorageClass("StructuredData") 

932 

933 refs = [] 

934 for visit in (2048, 2049, 2050): 

935 dataId = { 

936 "instrument": "dummy", 

937 "visit": visit, 

938 "physical_filter": "Uprime", 

939 "band": "u", 

940 "day_obs": 20250101, 

941 } 

942 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

943 datastore.put(metrics, ref) 

944 refs.append(ref) 

945 return datastore, refs 

946 

947 def testExportImportRecords(self) -> None: 

948 """Test for export_records and import_records methods.""" 

949 datastore, refs = self._populate_export_datastore("test_datastore") 

950 for exported_refs in (refs, refs[1:]): 

951 n_refs = len(exported_refs) 

952 records = datastore.export_records(exported_refs) 

953 self.assertGreater(len(records), 0) 

954 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

955 # In a ChainedDatastore each FileDatastore will have a complete set 

956 for datastore_name in records: 

957 record_data = records[datastore_name] 

958 self.assertEqual(len(record_data.records), n_refs) 

959 

960 # Check that subsetting works, include non-existing dataset ID. 

961 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

962 subset = record_data.subset(dataset_ids) 

963 assert subset is not None 

964 self.assertEqual(len(subset.records), 1) 

965 subset = record_data.subset({uuid.uuid4()}) 

966 self.assertIsNone(subset) 

967 

968 # Use the same datastore name to import relative path. 

969 datastore2 = self.makeDatastore("test_datastore") 

970 

971 records = datastore.export_records(refs[1:]) 

972 datastore2.import_records(records) 

973 

974 with self.assertRaises(FileNotFoundError): 

975 data = datastore2.get(refs[0]) 

976 data = datastore2.get(refs[1]) 

977 self.assertIsNotNone(data) 

978 data = datastore2.get(refs[2]) 

979 self.assertIsNotNone(data) 

980 

981 def testExport(self) -> None: 

982 datastore, refs = self._populate_export_datastore("test_datastore") 

983 

984 datasets = list(datastore.export(refs)) 

985 self.assertEqual(len(datasets), 3) 

986 

987 for transfer in (None, "auto"): 

988 # Both will default to None 

989 datasets = list(datastore.export(refs, transfer=transfer)) 

990 self.assertEqual(len(datasets), 3) 

991 

992 with self.assertRaises(TypeError): 

993 list(datastore.export(refs, transfer="copy")) 

994 

995 with self.assertRaises(TypeError): 

996 list(datastore.export(refs, directory="exportDir", transfer="move")) 

997 

998 # Create a new ref that is not known to the datastore and try to 

999 # export it. 

1000 sc = self.storageClassFactory.getStorageClass("ThingOne") 

1001 dimensions = self.universe.conform(("visit", "physical_filter")) 

1002 dataId = { 

1003 "instrument": "dummy", 

1004 "visit": 52, 

1005 "physical_filter": "V", 

1006 "band": "v", 

1007 "day_obs": 20250101, 

1008 } 

1009 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

1010 with self.assertRaises(FileNotFoundError): 

1011 list(datastore.export(refs + [ref], transfer=None)) 

1012 

1013 def test_pydantic_dict_storage_class_conversions(self) -> None: 

1014 """Test converting a dataset stored as a pydantic model into a dict on 

1015 read. 

1016 """ 

1017 datastore = self.makeDatastore() 

1018 store_as_model = self.makeDatasetRef( 

1019 "store_as_model", 

1020 dimensions=self.universe.empty, 

1021 storageClass="DictConvertibleModel", 

1022 dataId=DataCoordinate.make_empty(self.universe), 

1023 ) 

1024 content = {"a": "one", "b": "two"} 

1025 model = DictConvertibleModel.from_dict(content, extra="original content") 

1026 datastore.put(model, store_as_model) 

1027 retrieved_model = datastore.get(store_as_model) 

1028 self.assertEqual(retrieved_model, model) 

1029 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

1030 self.assertEqual(type(loaded), dict) 

1031 self.assertEqual(loaded, content) 

1032 

1033 def test_simple_class_put_get(self) -> None: 

1034 """Test that we can put and get a simple class with dict() 

1035 constructor. 

1036 """ 

1037 datastore = self.makeDatastore() 

1038 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

1039 self._assert_different_puts(datastore, "MetricsExample", data) 

1040 

1041 def test_dataclass_put_get(self) -> None: 

1042 """Test that we can put and get a simple dataclass.""" 

1043 datastore = self.makeDatastore() 

1044 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

1045 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

1046 

1047 def test_pydantic_put_get(self) -> None: 

1048 """Test that we can put and get a simple Pydantic model.""" 

1049 datastore = self.makeDatastore() 

1050 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

1051 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

1052 

1053 def test_tuple_put_get(self) -> None: 

1054 """Test that we can put and get a tuple.""" 

1055 datastore = self.makeDatastore() 

1056 data = ("a", "b", 1) 

1057 self._assert_different_puts(datastore, "TupleExample", data) 

1058 

1059 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

1060 refs = { 

1061 x: self.makeDatasetRef( 

1062 f"stora_as_{x}", 

1063 dimensions=self.universe.empty, 

1064 storageClass=f"{storageClass_root}{x}", 

1065 dataId=DataCoordinate.make_empty(self.universe), 

1066 ) 

1067 for x in ["A", "B"] 

1068 } 

1069 

1070 for ref in refs.values(): 

1071 datastore.put(data, ref) 

1072 

1073 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1074 

1075 

1076class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1077 """PosixDatastore specialization""" 

1078 

1079 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1080 uriScheme = "file" 

1081 canIngestNoTransferAuto = True 

1082 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1083 isEphemeral = False 

1084 rootKeys = ("root",) 

1085 validationCanFail = True 

1086 

1087 def setUp(self) -> None: 

1088 # The call to os.path.realpath is necessary because Mac temporary files 

1089 # can end up in either /private/var/folders or /var/folders, which 

1090 # refer to the same location but don't appear to. 

1091 # This matters for "relsymlink" transfer mode, because it needs to be 

1092 # able to read the file through a relative symlink, but some of the 

1093 # intermediate directories are not traversable if you try to get from a 

1094 # tempfile in /var/folders to one in /private/var/folders via a 

1095 # relative path. 

1096 self.root = os.path.realpath(self.enterContext(tempfile.TemporaryDirectory())) 

1097 super().setUp() 

1098 

1099 def testAtomicWrite(self) -> None: 

1100 """Test that we write to a temporary and then rename""" 

1101 datastore = self.makeDatastore() 

1102 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1103 dimensions = self.universe.conform(("visit", "physical_filter")) 

1104 metrics = makeExampleMetrics() 

1105 

1106 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101} 

1107 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1108 

1109 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1110 datastore.put(metrics, ref) 

1111 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1112 self.assertIn("transfer=move", move_logs[0]) 

1113 

1114 # And the transfer should be file to file. 

1115 self.assertEqual(move_logs[0].count("file://"), 2) 

1116 

1117 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1118 """Verify that the expected exception is raised if the FileDatastore 

1119 can not determine the put formatter location. 

1120 """ 

1121 _ = makeExampleMetrics() 

1122 datastore = self.makeDatastore() 

1123 

1124 # Create multiple storage classes for testing different formulations 

1125 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1126 

1127 sccomp = StorageClass("Dummy") 

1128 compositeStorageClass = StorageClass( 

1129 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1130 ) 

1131 

1132 dimensions = self.universe.conform(("visit", "physical_filter")) 

1133 dataId = { 

1134 "instrument": "dummy", 

1135 "visit": 52, 

1136 "physical_filter": "V", 

1137 "band": "v", 

1138 "day_obs": 20250101, 

1139 } 

1140 

1141 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1142 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1143 

1144 def raiser(ref: DatasetRef) -> None: 

1145 raise DatasetTypeNotSupportedError() 

1146 

1147 with unittest.mock.patch.object( 

1148 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1149 "_determine_put_formatter_location", 

1150 side_effect=raiser, 

1151 ): 

1152 # verify the non-composite ref execution path: 

1153 with self.assertRaises(DatasetTypeNotSupportedError): 

1154 datastore.getURIs(ref, predict=True) 

1155 

1156 # verify the composite-ref execution path: 

1157 with self.assertRaises(DatasetTypeNotSupportedError): 

1158 datastore.getURIs(compRef, predict=True) 

1159 

1160 def test_roots(self): 

1161 datastore = self.makeDatastore() 

1162 

1163 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1164 for root in datastore.roots.values(): 

1165 if root is not None: 

1166 self.assertTrue(root.exists()) 

1167 

1168 def test_prepare_get_for_external_client(self): 

1169 datastore = self.makeDatastore() 

1170 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1171 dimensions = self.universe.conform(("visit", "physical_filter")) 

1172 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1173 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1174 # Most of the coverage for this function is in test_server.py, 

1175 # because it requires a file backend that supports URL signing. 

1176 self.assertIsNone(datastore.prepare_get_for_external_client(ref)) 

1177 

1178 

1179class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1180 """Posix datastore tests but with checksums disabled.""" 

1181 

1182 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1183 

1184 def testChecksum(self) -> None: 

1185 """Ensure that checksums have not been calculated.""" 

1186 datastore = self.makeDatastore() 

1187 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1188 dimensions = self.universe.conform(("visit", "physical_filter")) 

1189 metrics = makeExampleMetrics() 

1190 

1191 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v", "day_obs": 20250101} 

1192 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1193 

1194 # Configuration should have disabled checksum calculation 

1195 datastore.put(metrics, ref) 

1196 infos = datastore.getStoredItemsInfo(ref) 

1197 self.assertIsNone(infos[0].checksum) 

1198 

1199 # Remove put back but with checksums enabled explicitly 

1200 datastore.remove(ref) 

1201 datastore.useChecksum = True 

1202 datastore.put(metrics, ref) 

1203 

1204 infos = datastore.getStoredItemsInfo(ref) 

1205 self.assertIsNotNone(infos[0].checksum) 

1206 

1207 def test_repeat_ingest(self): 

1208 """Test that repeatedly ingesting the same file in direct mode 

1209 is allowed. 

1210 

1211 Test can only run with FileDatastore since that is the only one 

1212 supporting "direct" ingest. 

1213 """ 

1214 metrics, v4ref = self._prepareIngestTest() 

1215 datastore = self.makeDatastore() 

1216 v5ref = DatasetRef( 

1217 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN 

1218 ) 

1219 

1220 with _temp_yaml_file(metrics._asdict()) as path: 

1221 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1222 

1223 # This will fail because the ref is using UUIDv4. 

1224 with self.assertRaises(RuntimeError): 

1225 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1226 

1227 # UUIDv5 can be repeatedly ingested in direct mode. 

1228 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1229 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1230 

1231 with self.assertRaises(RuntimeError): 

1232 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy") 

1233 

1234 

1235class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1236 """Restrict trash test to FileDatastore.""" 

1237 

1238 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1239 

1240 def testTrash(self) -> None: 

1241 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1242 

1243 # Trash one of them. 

1244 ref = refs.pop() 

1245 uri = datastore.getURI(ref) 

1246 datastore.trash(ref) 

1247 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1248 datastore.emptyTrash() 

1249 self.assertFalse(uri.exists(), uri) 

1250 

1251 # Trash it again should be fine. 

1252 datastore.trash(ref) 

1253 

1254 # Trash multiple items at once. 

1255 subset = [refs.pop(), refs.pop()] 

1256 datastore.trash(subset) 

1257 datastore.emptyTrash() 

1258 

1259 # Remove a record and trash should do nothing. 

1260 # This is execution butler scenario. 

1261 ref = refs.pop() 

1262 uri = datastore.getURI(ref) 

1263 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1264 self.assertTrue(uri.exists()) 

1265 datastore.trash(ref) 

1266 datastore.emptyTrash() 

1267 self.assertTrue(uri.exists()) 

1268 

1269 # Switch on trust and it should delete the file. 

1270 datastore.trustGetRequest = True 

1271 datastore.trash([ref]) 

1272 self.assertFalse(uri.exists()) 

1273 

1274 # Remove multiples at once in trust mode. 

1275 subset = [refs.pop() for i in range(3)] 

1276 datastore.trash(subset) 

1277 datastore.trash(refs.pop()) # Check that a single ref can trash 

1278 

1279 

1280class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1281 """Test datastore cleans up on failure.""" 

1282 

1283 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1284 

1285 def setUp(self) -> None: 

1286 # Override the working directory before calling the base class 

1287 self.root = tempfile.mkdtemp() 

1288 super().setUp() 

1289 

1290 def testCleanup(self) -> None: 

1291 """Test that a failed formatter write does cleanup a partial file.""" 

1292 metrics = makeExampleMetrics() 

1293 datastore = self.makeDatastore() 

1294 

1295 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1296 

1297 dimensions = self.universe.conform(("visit", "physical_filter")) 

1298 dataId = { 

1299 "instrument": "dummy", 

1300 "visit": 52, 

1301 "physical_filter": "V", 

1302 "band": "v", 

1303 "day_obs": 20250101, 

1304 } 

1305 

1306 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1307 

1308 # Determine where the file will end up (we assume Formatters use 

1309 # the same file extension) 

1310 expectedUri = datastore.getURI(ref, predict=True) 

1311 self.assertEqual(expectedUri.fragment, "predicted") 

1312 

1313 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1314 

1315 # Try formatter that fails and formatter that fails and leaves 

1316 # a file behind 

1317 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1318 with self.subTest(formatter=formatter): 

1319 # Monkey patch the formatter 

1320 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1321 

1322 # Try to put the dataset, it should fail 

1323 with self.assertRaises(RuntimeError): 

1324 datastore.put(metrics, ref) 

1325 

1326 # Check that there is no file on disk 

1327 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1328 

1329 # Check that there is a directory 

1330 dir = expectedUri.dirname() 

1331 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1332 

1333 # Force YamlFormatter and check that this time a file is written 

1334 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1335 datastore.put(metrics, ref) 

1336 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1337 datastore.remove(ref) 

1338 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1339 

1340 

1341class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1342 """PosixDatastore specialization""" 

1343 

1344 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1345 uriScheme = "mem" 

1346 hasUnsupportedPut = False 

1347 ingestTransferModes = () 

1348 isEphemeral = True 

1349 rootKeys = None 

1350 validationCanFail = False 

1351 

1352 

1353class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1354 """ChainedDatastore specialization using a POSIXDatastore""" 

1355 

1356 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1357 hasUnsupportedPut = False 

1358 canIngestNoTransferAuto = False 

1359 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1360 isEphemeral = False 

1361 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1362 validationCanFail = True 

1363 

1364 

1365class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1366 """ChainedDatastore specialization using all InMemoryDatastore""" 

1367 

1368 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1369 validationCanFail = False 

1370 

1371 

1372class DatastoreConstraintsTests(DatastoreTestsBase): 

1373 """Basic tests of constraints model of Datastores.""" 

1374 

1375 def testConstraints(self) -> None: 

1376 """Test constraints model. Assumes that each test class has the 

1377 same constraints. 

1378 """ 

1379 metrics = makeExampleMetrics() 

1380 datastore = self.makeDatastore() 

1381 

1382 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1383 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1384 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1385 dataId = { 

1386 "visit": 52, 

1387 "physical_filter": "V", 

1388 "band": "v", 

1389 "instrument": "DummyCamComp", 

1390 "day_obs": 20250101, 

1391 } 

1392 

1393 # Write empty file suitable for ingest check (JSON and YAML variants) 

1394 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1395 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1396 for datasetTypeName, sc, accepted in ( 

1397 ("metric", sc1, True), 

1398 ("metric5", sc1, False), 

1399 ("metric33", sc1, True), 

1400 ("metric5", sc2, True), 

1401 ): 

1402 # Choose different temp file depending on StorageClass 

1403 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1404 

1405 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1406 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1407 if accepted: 

1408 datastore.put(metrics, ref) 

1409 self.assertTrue(datastore.exists(ref)) 

1410 datastore.remove(ref) 

1411 

1412 # Try ingest 

1413 if self.canIngest: 

1414 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1415 self.assertTrue(datastore.exists(ref)) 

1416 datastore.remove(ref) 

1417 else: 

1418 with self.assertRaises(DatasetTypeNotSupportedError): 

1419 datastore.put(metrics, ref) 

1420 self.assertFalse(datastore.exists(ref)) 

1421 

1422 # Again with ingest 

1423 if self.canIngest: 

1424 with self.assertRaises(DatasetTypeNotSupportedError): 

1425 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1426 self.assertFalse(datastore.exists(ref)) 

1427 

1428 

1429class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1430 """PosixDatastore specialization""" 

1431 

1432 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1433 canIngest = True 

1434 

1435 def setUp(self) -> None: 

1436 # Override the working directory before calling the base class 

1437 self.root = tempfile.mkdtemp() 

1438 super().setUp() 

1439 

1440 

1441class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1442 """InMemoryDatastore specialization.""" 

1443 

1444 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1445 canIngest = False 

1446 

1447 

1448class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1449 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1450 at the ChainedDatstore. 

1451 """ 

1452 

1453 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1454 

1455 

1456class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1457 """ChainedDatastore specialization using a POSIXDatastore.""" 

1458 

1459 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1460 

1461 

1462class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1463 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1464 

1465 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1466 canIngest = False 

1467 

1468 

1469class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1470 """Test that a chained datastore can control constraints per-datastore 

1471 even if child datastore would accept. 

1472 """ 

1473 

1474 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1475 

1476 def setUp(self) -> None: 

1477 # Override the working directory before calling the base class 

1478 self.root = tempfile.mkdtemp() 

1479 super().setUp() 

1480 

1481 def testConstraints(self) -> None: 

1482 """Test chained datastore constraints model.""" 

1483 metrics = makeExampleMetrics() 

1484 datastore = self.makeDatastore() 

1485 

1486 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1487 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1488 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1489 dataId1 = { 

1490 "visit": 52, 

1491 "physical_filter": "V", 

1492 "band": "v", 

1493 "instrument": "DummyCamComp", 

1494 "day_obs": 20250101, 

1495 } 

1496 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC", "day_obs": 20250101} 

1497 

1498 # Write empty file suitable for ingest check (JSON and YAML variants) 

1499 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1500 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1501 

1502 for typeName, dataId, sc, accept, ingest in ( 

1503 ("metric", dataId1, sc1, (False, True, False), True), 

1504 ("metric5", dataId1, sc1, (False, False, False), False), 

1505 ("metric5", dataId2, sc1, (True, False, False), False), 

1506 ("metric33", dataId2, sc2, (True, True, False), True), 

1507 ("metric5", dataId1, sc2, (False, True, False), True), 

1508 ): 

1509 # Choose different temp file depending on StorageClass 

1510 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1511 

1512 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1513 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1514 if any(accept): 

1515 datastore.put(metrics, ref) 

1516 self.assertTrue(datastore.exists(ref)) 

1517 

1518 # Check each datastore inside the chained datastore 

1519 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1520 self.assertEqual( 

1521 childDatastore.exists(ref), 

1522 expected, 

1523 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1524 ) 

1525 

1526 datastore.remove(ref) 

1527 

1528 # Check that ingest works 

1529 if ingest: 

1530 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1531 self.assertTrue(datastore.exists(ref)) 

1532 

1533 # Check each datastore inside the chained datastore 

1534 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1535 # Ephemeral datastores means InMemory at the moment 

1536 # and that does not accept ingest of files. 

1537 if childDatastore.isEphemeral: 

1538 expected = False 

1539 self.assertEqual( 

1540 childDatastore.exists(ref), 

1541 expected, 

1542 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1543 ) 

1544 

1545 datastore.remove(ref) 

1546 else: 

1547 with self.assertRaises(DatasetTypeNotSupportedError): 

1548 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1549 

1550 else: 

1551 with self.assertRaises(DatasetTypeNotSupportedError): 

1552 datastore.put(metrics, ref) 

1553 self.assertFalse(datastore.exists(ref)) 

1554 

1555 # Again with ingest 

1556 with self.assertRaises(DatasetTypeNotSupportedError): 

1557 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1558 self.assertFalse(datastore.exists(ref)) 

1559 

1560 

1561class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1562 """Tests for datastore caching infrastructure.""" 

1563 

1564 @classmethod 

1565 def setUpClass(cls) -> None: 

1566 cls.storageClassFactory = StorageClassFactory() 

1567 cls.universe = DimensionUniverse() 

1568 

1569 # Ensure that we load the test storage class definitions. 

1570 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1571 cls.storageClassFactory.addFromConfig(scConfigFile) 

1572 

1573 def setUp(self) -> None: 

1574 self.id = 0 

1575 

1576 # Create a root that we can use for caching tests. 

1577 self.root = tempfile.mkdtemp() 

1578 

1579 # Create some test dataset refs and associated test files 

1580 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1581 dimensions = self.universe.conform(("visit", "physical_filter")) 

1582 dataId = { 

1583 "instrument": "dummy", 

1584 "visit": 52, 

1585 "physical_filter": "V", 

1586 "band": "v", 

1587 "day_obs": 20250101, 

1588 } 

1589 

1590 # Create list of refs and list of temporary files 

1591 n_datasets = 10 

1592 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1593 

1594 root_uri = ResourcePath(self.root, forceDirectory=True) 

1595 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1596 

1597 # Create test files. 

1598 for uri in self.files: 

1599 uri.write(b"0123456789") 

1600 

1601 # Create some composite refs with component files. 

1602 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1603 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1604 self.comp_files = [] 

1605 self.comp_refs = [] 

1606 for n, ref in enumerate(self.composite_refs): 

1607 component_refs = [] 

1608 component_files = [] 

1609 for component in sc.components: 

1610 component_ref = ref.makeComponentRef(component) 

1611 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1612 component_refs.append(component_ref) 

1613 component_files.append(file) 

1614 file.write(b"9876543210") 

1615 

1616 self.comp_files.append(component_files) 

1617 self.comp_refs.append(component_refs) 

1618 

1619 def tearDown(self) -> None: 

1620 if self.root is not None and os.path.exists(self.root): 

1621 shutil.rmtree(self.root, ignore_errors=True) 

1622 

1623 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1624 config = Config.fromYaml(config_str) 

1625 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1626 

1627 def testNoCacheDir(self) -> None: 

1628 config_str = """ 

1629cached: 

1630 root: null 

1631 cacheable: 

1632 metric0: true 

1633 """ 

1634 cache_manager = self._make_cache_manager(config_str) 

1635 

1636 # Look inside to check we don't have a cache directory 

1637 self.assertIsNone(cache_manager._cache_directory) 

1638 

1639 self.assertCache(cache_manager) 

1640 

1641 # Test that the cache directory is marked temporary 

1642 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1643 

1644 def testNoCacheDirReversed(self) -> None: 

1645 """Use default caching status and metric1 to false""" 

1646 config_str = """ 

1647cached: 

1648 root: null 

1649 default: true 

1650 cacheable: 

1651 metric1: false 

1652 """ 

1653 cache_manager = self._make_cache_manager(config_str) 

1654 

1655 self.assertCache(cache_manager) 

1656 

1657 def testEnvvarCacheDir(self) -> None: 

1658 config_str = f""" 

1659cached: 

1660 root: '{self.root}' 

1661 cacheable: 

1662 metric0: true 

1663 """ 

1664 

1665 root = ResourcePath(self.root, forceDirectory=True) 

1666 env_dir = root.join("somewhere", forceDirectory=True) 

1667 elsewhere = root.join("elsewhere", forceDirectory=True) 

1668 

1669 # Environment variable should override the config value. 

1670 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1671 cache_manager = self._make_cache_manager(config_str) 

1672 self.assertEqual(cache_manager.cache_directory, env_dir) 

1673 

1674 # This environment variable should not override the config value. 

1675 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1676 cache_manager = self._make_cache_manager(config_str) 

1677 self.assertEqual(cache_manager.cache_directory, root) 

1678 

1679 # No default setting. 

1680 config_str = """ 

1681cached: 

1682 root: null 

1683 default: true 

1684 cacheable: 

1685 metric1: false 

1686 """ 

1687 cache_manager = self._make_cache_manager(config_str) 

1688 

1689 # This environment variable should override the config value. 

1690 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1691 cache_manager = self._make_cache_manager(config_str) 

1692 self.assertEqual(cache_manager.cache_directory, env_dir) 

1693 

1694 # If both environment variables are set the main (not IF_UNSET) 

1695 # variable should win. 

1696 with unittest.mock.patch.dict( 

1697 os.environ, 

1698 { 

1699 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1700 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1701 }, 

1702 ): 

1703 cache_manager = self._make_cache_manager(config_str) 

1704 self.assertEqual(cache_manager.cache_directory, env_dir) 

1705 

1706 # Use the API to set the environment variable, making sure that the 

1707 # variable is reset on exit. 

1708 with unittest.mock.patch.dict( 

1709 os.environ, 

1710 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1711 ): 

1712 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1713 self.assertTrue(defined) 

1714 cache_manager = self._make_cache_manager(config_str) 

1715 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1716 

1717 # Now create the cache manager ahead of time and set the fallback 

1718 # later. 

1719 cache_manager = self._make_cache_manager(config_str) 

1720 self.assertIsNone(cache_manager._cache_directory) 

1721 with unittest.mock.patch.dict( 

1722 os.environ, 

1723 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1724 ): 

1725 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1726 self.assertTrue(defined) 

1727 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1728 

1729 def testExplicitCacheDir(self) -> None: 

1730 config_str = f""" 

1731cached: 

1732 root: '{self.root}' 

1733 cacheable: 

1734 metric0: true 

1735 """ 

1736 cache_manager = self._make_cache_manager(config_str) 

1737 

1738 # Look inside to check we do have a cache directory. 

1739 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1740 

1741 self.assertCache(cache_manager) 

1742 

1743 # Test that the cache directory is not marked temporary 

1744 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1745 

1746 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1747 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1748 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1749 

1750 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1751 self.assertIsInstance(uri, ResourcePath) 

1752 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1753 

1754 # Check presence in cache using ref and then using file extension. 

1755 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1756 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1757 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1758 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1759 

1760 # Cached file should no longer exist but uncached file should be 

1761 # unaffected. 

1762 self.assertFalse(self.files[0].exists()) 

1763 self.assertTrue(self.files[1].exists()) 

1764 

1765 # Should find this file and it should be within the cache directory. 

1766 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1767 self.assertTrue(found.exists()) 

1768 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1769 

1770 # Should not be able to find these in cache 

1771 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1772 self.assertIsNone(found) 

1773 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1774 self.assertIsNone(found) 

1775 

1776 def testNoCache(self) -> None: 

1777 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1778 for uri, ref in zip(self.files, self.refs, strict=True): 

1779 self.assertFalse(cache_manager.should_be_cached(ref)) 

1780 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1781 self.assertFalse(cache_manager.known_to_cache(ref)) 

1782 with cache_manager.find_in_cache(ref, ".txt") as found: 

1783 self.assertIsNone(found, msg=f"{cache_manager}") 

1784 

1785 def _expiration_config(self, mode: str, threshold: int) -> str: 

1786 return f""" 

1787cached: 

1788 default: true 

1789 expiry: 

1790 mode: {mode} 

1791 threshold: {threshold} 

1792 cacheable: 

1793 unused: true 

1794 """ 

1795 

1796 def testCacheExpiryFiles(self) -> None: 

1797 threshold = 2 # Keep at least 2 files. 

1798 mode = "files" 

1799 config_str = self._expiration_config(mode, threshold) 

1800 

1801 cache_manager = self._make_cache_manager(config_str) 

1802 

1803 # Check that an empty cache returns unknown for arbitrary ref 

1804 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1805 

1806 # Should end with datasets: 2, 3, 4 

1807 self.assertExpiration(cache_manager, 5, threshold + 1) 

1808 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1809 

1810 # Check that we will not expire a file that is actively in use. 

1811 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1812 self.assertIsNotNone(found) 

1813 

1814 # Trigger cache expiration that should remove the file 

1815 # we just retrieved. Should now have: 3, 4, 5 

1816 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1817 self.assertIsNotNone(cached) 

1818 

1819 # Cache should still report the standard file count. 

1820 self.assertEqual(cache_manager.file_count, threshold + 1) 

1821 

1822 # Add additional entry to cache. 

1823 # Should now have 4, 5, 6 

1824 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1825 self.assertIsNotNone(cached) 

1826 

1827 # Is the file still there? 

1828 self.assertTrue(found.exists()) 

1829 

1830 # Can we read it? 

1831 data = found.read() 

1832 self.assertGreater(len(data), 0) 

1833 

1834 # Outside context the file should no longer exist. 

1835 self.assertFalse(found.exists()) 

1836 

1837 # File count should not have changed. 

1838 self.assertEqual(cache_manager.file_count, threshold + 1) 

1839 

1840 # Dataset 2 was in the exempt directory but because hardlinks 

1841 # are used it was deleted from the main cache during cache expiry 

1842 # above and so should no longer be found. 

1843 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1844 self.assertIsNone(found) 

1845 

1846 # And the one stored after it is also gone. 

1847 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1848 self.assertIsNone(found) 

1849 

1850 # But dataset 4 is present. 

1851 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1852 self.assertIsNotNone(found) 

1853 

1854 # Adding a new dataset to the cache should now delete it. 

1855 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1856 

1857 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1858 self.assertIsNone(found) 

1859 

1860 def testCacheExpiryDatasets(self) -> None: 

1861 threshold = 2 # Keep 2 datasets. 

1862 mode = "datasets" 

1863 config_str = self._expiration_config(mode, threshold) 

1864 

1865 cache_manager = self._make_cache_manager(config_str) 

1866 self.assertExpiration(cache_manager, 5, threshold + 1) 

1867 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1868 

1869 def testCacheExpiryDatasetsComposite(self) -> None: 

1870 threshold = 2 # Keep 2 datasets. 

1871 mode = "datasets" 

1872 config_str = self._expiration_config(mode, threshold) 

1873 

1874 cache_manager = self._make_cache_manager(config_str) 

1875 

1876 n_datasets = 3 

1877 for i in range(n_datasets): 

1878 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1879 cached = cache_manager.move_to_cache(component_file, component_ref) 

1880 self.assertIsNotNone(cached) 

1881 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1882 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1883 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1884 

1885 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1886 

1887 # Write two new non-composite and the number of files should drop. 

1888 self.assertExpiration(cache_manager, 2, 5) 

1889 

1890 def testCacheExpirySize(self) -> None: 

1891 threshold = 55 # Each file is 10 bytes 

1892 mode = "size" 

1893 config_str = self._expiration_config(mode, threshold) 

1894 

1895 cache_manager = self._make_cache_manager(config_str) 

1896 self.assertExpiration(cache_manager, 10, 6) 

1897 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1898 

1899 def assertExpiration( 

1900 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1901 ) -> None: 

1902 """Insert the datasets and then check the number retained.""" 

1903 for i in range(n_datasets): 

1904 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1905 self.assertIsNotNone(cached) 

1906 

1907 self.assertEqual(cache_manager.file_count, n_retained) 

1908 

1909 # The oldest file should not be in the cache any more. 

1910 for i in range(n_datasets): 

1911 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1912 if i >= n_datasets - n_retained: 

1913 self.assertIsInstance(found, ResourcePath) 

1914 else: 

1915 self.assertIsNone(found) 

1916 

1917 def testCacheExpiryAge(self) -> None: 

1918 threshold = 1 # Expire older than 2 seconds 

1919 mode = "age" 

1920 config_str = self._expiration_config(mode, threshold) 

1921 

1922 cache_manager = self._make_cache_manager(config_str) 

1923 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1924 

1925 # Insert 3 files, then sleep, then insert more. 

1926 for i in range(2): 

1927 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1928 self.assertIsNotNone(cached) 

1929 time.sleep(2.0) 

1930 for j in range(4): 

1931 i = 2 + j # Continue the counting 

1932 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1933 self.assertIsNotNone(cached) 

1934 

1935 # Only the files written after the sleep should exist. 

1936 self.assertEqual(cache_manager.file_count, 4) 

1937 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1938 self.assertIsNone(found) 

1939 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1940 self.assertIsInstance(found, ResourcePath) 

1941 

1942 

1943class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1944 """Test the null datastore.""" 

1945 

1946 storageClassFactory = StorageClassFactory() 

1947 

1948 def test_basics(self) -> None: 

1949 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1950 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1951 

1952 null = NullDatastore(None, None) 

1953 

1954 self.assertFalse(null.exists(ref)) 

1955 self.assertFalse(null.knows(ref)) 

1956 knows = null.knows_these([ref]) 

1957 self.assertFalse(knows[ref]) 

1958 null.validateConfiguration(ref) 

1959 

1960 with self.assertRaises(FileNotFoundError): 

1961 null.get(ref) 

1962 with self.assertRaises(NotImplementedError): 

1963 null.put("", ref) 

1964 with self.assertRaises(FileNotFoundError): 

1965 null.getURI(ref) 

1966 with self.assertRaises(FileNotFoundError): 

1967 null.getURIs(ref) 

1968 with self.assertRaises(FileNotFoundError): 

1969 null.getManyURIs([ref]) 

1970 with self.assertRaises(NotImplementedError): 

1971 null.getLookupKeys() 

1972 with self.assertRaises(NotImplementedError): 

1973 null.import_records({}) 

1974 with self.assertRaises(NotImplementedError): 

1975 null.export_records([]) 

1976 with self.assertRaises(NotImplementedError): 

1977 null.export([ref]) 

1978 with self.assertRaises(NotImplementedError): 

1979 null.transfer(null, ref) 

1980 with self.assertRaises(NotImplementedError): 

1981 null.emptyTrash() 

1982 with self.assertRaises(NotImplementedError): 

1983 null.trash(ref) 

1984 with self.assertRaises(NotImplementedError): 

1985 null.forget([ref]) 

1986 with self.assertRaises(NotImplementedError): 

1987 null.remove(ref) 

1988 with self.assertRaises(NotImplementedError): 

1989 null.retrieveArtifacts([ref], ResourcePath(".")) 

1990 with self.assertRaises(NotImplementedError): 

1991 null.transfer_from(null, [ref]) 

1992 with self.assertRaises(NotImplementedError): 

1993 null.ingest() 

1994 

1995 

1996class DatasetRefURIsTestCase(unittest.TestCase): 

1997 """Tests for DatasetRefURIs.""" 

1998 

1999 def testSequenceAccess(self) -> None: 

2000 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

2001 uris = DatasetRefURIs() 

2002 

2003 self.assertEqual(len(uris), 2) 

2004 self.assertEqual(uris[0], None) 

2005 self.assertEqual(uris[1], {}) 

2006 

2007 primaryURI = ResourcePath("1/2/3") 

2008 componentURI = ResourcePath("a/b/c") 

2009 

2010 # affirm that DatasetRefURIs does not support MutableSequence functions 

2011 with self.assertRaises(TypeError): 

2012 uris[0] = primaryURI 

2013 with self.assertRaises(TypeError): 

2014 uris[1] = {"foo": componentURI} 

2015 

2016 # but DatasetRefURIs can be set by property name: 

2017 uris.primaryURI = primaryURI 

2018 uris.componentURIs = {"foo": componentURI} 

2019 self.assertEqual(uris.primaryURI, primaryURI) 

2020 self.assertEqual(uris[0], primaryURI) 

2021 

2022 primary, components = uris 

2023 self.assertEqual(primary, primaryURI) 

2024 self.assertEqual(components, {"foo": componentURI}) 

2025 

2026 def testRepr(self) -> None: 

2027 """Verify __repr__ output.""" 

2028 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

2029 self.assertEqual( 

2030 repr(uris), 

2031 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

2032 ) 

2033 

2034 

2035class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

2036 """Test the StoredFileInfo class.""" 

2037 

2038 storageClassFactory = StorageClassFactory() 

2039 

2040 def test_StoredFileInfo(self) -> None: 

2041 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

2042 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

2043 

2044 record = dict( 

2045 storage_class="StructuredDataDict", 

2046 formatter="lsst.daf.butler.Formatter", 

2047 path="a/b/c.txt", 

2048 component="component", 

2049 checksum=None, 

2050 file_size=5, 

2051 ) 

2052 info = StoredFileInfo.from_record(record) 

2053 

2054 self.assertEqual(info.to_record(), record) 

2055 

2056 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

2057 rebased = info.rebase(ref2) 

2058 self.assertEqual(rebased.rebase(ref), info) 

2059 

2060 with self.assertRaises(TypeError): 

2061 rebased.update(formatter=42) 

2062 

2063 with self.assertRaises(ValueError): 

2064 rebased.update(something=42, new="42") 

2065 

2066 # Check that pickle works on StoredFileInfo. 

2067 pickled_info = pickle.dumps(info) 

2068 unpickled_info = pickle.loads(pickled_info) 

2069 self.assertEqual(unpickled_info, info) 

2070 

2071 

2072@contextlib.contextmanager 

2073def _temp_yaml_file(data: Any) -> Iterator[str]: 

2074 fh = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") 

2075 try: 

2076 yaml.dump(data, stream=fh) 

2077 fh.flush() 

2078 yield fh.name 

2079 finally: 

2080 # Some tests delete the file 

2081 with contextlib.suppress(FileNotFoundError): 

2082 fh.close() 

2083 

2084 

2085if __name__ == "__main__": 

2086 unittest.main()