Coverage for tests/test_datastore.py: 11%

1074 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import os 

31import pickle 

32import shutil 

33import tempfile 

34import time 

35import unittest 

36import unittest.mock 

37import uuid 

38from collections.abc import Callable 

39from typing import Any, cast 

40 

41import lsst.utils.tests 

42import yaml 

43from lsst.daf.butler import ( 

44 Config, 

45 DataCoordinate, 

46 DatasetIdGenEnum, 

47 DatasetRef, 

48 DatasetRefURIs, 

49 DatasetType, 

50 DatasetTypeNotSupportedError, 

51 Datastore, 

52 DatastoreCacheManager, 

53 DatastoreCacheManagerConfig, 

54 DatastoreConfig, 

55 DatastoreDisabledCacheManager, 

56 DatastoreValidationError, 

57 DimensionUniverse, 

58 FileDataset, 

59 NullDatastore, 

60 StorageClass, 

61 StorageClassFactory, 

62 StoredFileInfo, 

63) 

64from lsst.daf.butler.formatters.yaml import YamlFormatter 

65from lsst.daf.butler.tests import ( 

66 BadNoWriteFormatter, 

67 BadWriteFormatter, 

68 DatasetTestHelper, 

69 DatastoreTestHelper, 

70 DummyRegistry, 

71 MetricsExample, 

72 MetricsExampleDataclass, 

73 MetricsExampleModel, 

74) 

75from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

76from lsst.daf.butler.tests.utils import TestCaseMixin 

77from lsst.resources import ResourcePath 

78from lsst.utils import doImport 

79 

80TESTDIR = os.path.dirname(__file__) 

81 

82 

83def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

84 """Make example dataset that can be stored in butler.""" 

85 if use_none: 

86 array = None 

87 else: 

88 array = [563, 234, 456.7, 105, 2054, -1045] 

89 return MetricsExample( 

90 {"AM1": 5.2, "AM2": 30.6}, 

91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

92 array, 

93 ) 

94 

95 

96class TransactionTestError(Exception): 

97 """Specific error for transactions, to prevent misdiagnosing 

98 that might otherwise occur when a standard exception is used. 

99 """ 

100 

101 pass 

102 

103 

104class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

105 """Support routines for datastore testing""" 

106 

107 root: str | None = None 

108 universe: DimensionUniverse 

109 storageClassFactory: StorageClassFactory 

110 

111 @classmethod 

112 def setUpClass(cls) -> None: 

113 # Storage Classes are fixed for all datastores in these tests 

114 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

115 cls.storageClassFactory = StorageClassFactory() 

116 cls.storageClassFactory.addFromConfig(scConfigFile) 

117 

118 # Read the Datastore config so we can get the class 

119 # information (since we should not assume the constructor 

120 # name here, but rely on the configuration file itself) 

121 datastoreConfig = DatastoreConfig(cls.configFile) 

122 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

123 cls.universe = DimensionUniverse() 

124 

125 def setUp(self) -> None: 

126 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

127 

128 def tearDown(self) -> None: 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 

133class DatastoreTests(DatastoreTestsBase): 

134 """Some basic tests of a simple datastore.""" 

135 

136 hasUnsupportedPut = True 

137 rootKeys: tuple[str, ...] | None = None 

138 isEphemeral: bool = False 

139 validationCanFail: bool = False 

140 

141 def testConfigRoot(self) -> None: 

142 full = DatastoreConfig(self.configFile) 

143 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

144 newroot = "/random/location" 

145 self.datastoreType.setConfigRoot(newroot, config, full) 

146 if self.rootKeys: 

147 for k in self.rootKeys: 

148 self.assertIn(newroot, config[k]) 

149 

150 def testConstructor(self) -> None: 

151 datastore = self.makeDatastore() 

152 self.assertIsNotNone(datastore) 

153 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

154 

155 def testConfigurationValidation(self) -> None: 

156 datastore = self.makeDatastore() 

157 sc = self.storageClassFactory.getStorageClass("ThingOne") 

158 datastore.validateConfiguration([sc]) 

159 

160 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

161 if self.validationCanFail: 

162 with self.assertRaises(DatastoreValidationError): 

163 datastore.validateConfiguration([sc2], logFailures=True) 

164 

165 dimensions = self.universe.extract(("visit", "physical_filter")) 

166 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

168 datastore.validateConfiguration([ref]) 

169 

170 def testParameterValidation(self) -> None: 

171 """Check that parameters are validated""" 

172 sc = self.storageClassFactory.getStorageClass("ThingOne") 

173 dimensions = self.universe.extract(("visit", "physical_filter")) 

174 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

175 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

176 datastore = self.makeDatastore() 

177 data = {1: 2, 3: 4} 

178 datastore.put(data, ref) 

179 newdata = datastore.get(ref) 

180 self.assertEqual(data, newdata) 

181 with self.assertRaises(KeyError): 

182 newdata = datastore.get(ref, parameters={"missing": 5}) 

183 

184 def testBasicPutGet(self) -> None: 

185 metrics = makeExampleMetrics() 

186 datastore = self.makeDatastore() 

187 

188 # Create multiple storage classes for testing different formulations 

189 storageClasses = [ 

190 self.storageClassFactory.getStorageClass(sc) 

191 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

192 ] 

193 

194 dimensions = self.universe.extract(("visit", "physical_filter")) 

195 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

196 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} 

197 

198 for sc in storageClasses: 

199 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

200 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

201 

202 # Make sure that using getManyURIs without predicting before the 

203 # dataset has been put raises. 

204 with self.assertRaises(FileNotFoundError): 

205 datastore.getManyURIs([ref], predict=False) 

206 

207 # Make sure that using getManyURIs with predicting before the 

208 # dataset has been put predicts the URI. 

209 uris = datastore.getManyURIs([ref, ref2], predict=True) 

210 self.assertIn("52", uris[ref].primaryURI.geturl()) 

211 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

212 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

213 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

214 

215 datastore.put(metrics, ref) 

216 

217 # Does it exist? 

218 self.assertTrue(datastore.exists(ref)) 

219 self.assertTrue(datastore.knows(ref)) 

220 multi = datastore.knows_these([ref]) 

221 self.assertTrue(multi[ref]) 

222 multi = datastore.mexists([ref, ref2]) 

223 self.assertTrue(multi[ref]) 

224 self.assertFalse(multi[ref2]) 

225 

226 # Get 

227 metricsOut = datastore.get(ref, parameters=None) 

228 self.assertEqual(metrics, metricsOut) 

229 

230 uri = datastore.getURI(ref) 

231 self.assertEqual(uri.scheme, self.uriScheme) 

232 

233 uris = datastore.getManyURIs([ref]) 

234 self.assertEqual(len(uris), 1) 

235 ref, uri = uris.popitem() 

236 self.assertTrue(uri.primaryURI.exists()) 

237 self.assertFalse(uri.componentURIs) 

238 

239 # Get a component -- we need to construct new refs for them 

240 # with derived storage classes but with parent ID 

241 for comp in ("data", "output"): 

242 compRef = ref.makeComponentRef(comp) 

243 output = datastore.get(compRef) 

244 self.assertEqual(output, getattr(metricsOut, comp)) 

245 

246 uri = datastore.getURI(compRef) 

247 self.assertEqual(uri.scheme, self.uriScheme) 

248 

249 uris = datastore.getManyURIs([compRef]) 

250 self.assertEqual(len(uris), 1) 

251 

252 storageClass = sc 

253 

254 # Check that we can put a metric with None in a component and 

255 # get it back as None 

256 metricsNone = makeExampleMetrics(use_none=True) 

257 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

258 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

259 datastore.put(metricsNone, refNone) 

260 

261 comp = "data" 

262 for comp in ("data", "output"): 

263 compRef = refNone.makeComponentRef(comp) 

264 output = datastore.get(compRef) 

265 self.assertEqual(output, getattr(metricsNone, comp)) 

266 

267 # Check that a put fails if the dataset type is not supported 

268 if self.hasUnsupportedPut: 

269 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

270 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

271 with self.assertRaises(DatasetTypeNotSupportedError): 

272 datastore.put(metrics, ref) 

273 

274 # These should raise 

275 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

276 with self.assertRaises(FileNotFoundError): 

277 # non-existing file 

278 datastore.get(ref) 

279 

280 # Get a URI from it 

281 uri = datastore.getURI(ref, predict=True) 

282 self.assertEqual(uri.scheme, self.uriScheme) 

283 

284 with self.assertRaises(FileNotFoundError): 

285 datastore.getURI(ref) 

286 

287 def testTrustGetRequest(self) -> None: 

288 """Check that we can get datasets that registry knows nothing about.""" 

289 datastore = self.makeDatastore() 

290 

291 # Skip test if the attribute is not defined 

292 if not hasattr(datastore, "trustGetRequest"): 

293 return 

294 

295 metrics = makeExampleMetrics() 

296 

297 i = 0 

298 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

299 i += 1 

300 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

301 

302 if sc_name == "StructuredComposite": 

303 disassembled = True 

304 else: 

305 disassembled = False 

306 

307 # Start datastore in default configuration of using registry 

308 datastore.trustGetRequest = False 

309 

310 # Create multiple storage classes for testing with or without 

311 # disassembly 

312 sc = self.storageClassFactory.getStorageClass(sc_name) 

313 dimensions = self.universe.extract(("visit", "physical_filter")) 

314 

315 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} 

316 

317 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

318 datastore.put(metrics, ref) 

319 

320 # Does it exist? 

321 self.assertTrue(datastore.exists(ref)) 

322 self.assertTrue(datastore.knows(ref)) 

323 multi = datastore.knows_these([ref]) 

324 self.assertTrue(multi[ref]) 

325 multi = datastore.mexists([ref]) 

326 self.assertTrue(multi[ref]) 

327 

328 # Get 

329 metricsOut = datastore.get(ref) 

330 self.assertEqual(metrics, metricsOut) 

331 

332 # Get the URI(s) 

333 primaryURI, componentURIs = datastore.getURIs(ref) 

334 if disassembled: 

335 self.assertIsNone(primaryURI) 

336 self.assertEqual(len(componentURIs), 3) 

337 else: 

338 self.assertIn(datasetTypeName, primaryURI.path) 

339 self.assertFalse(componentURIs) 

340 

341 # Delete registry entry so now we are trusting 

342 datastore.removeStoredItemInfo(ref) 

343 

344 # Now stop trusting and check that things break 

345 datastore.trustGetRequest = False 

346 

347 # Does it exist? 

348 self.assertFalse(datastore.exists(ref)) 

349 self.assertFalse(datastore.knows(ref)) 

350 multi = datastore.knows_these([ref]) 

351 self.assertFalse(multi[ref]) 

352 multi = datastore.mexists([ref]) 

353 self.assertFalse(multi[ref]) 

354 

355 with self.assertRaises(FileNotFoundError): 

356 datastore.get(ref) 

357 

358 if sc_name != "StructuredDataNoComponents": 

359 with self.assertRaises(FileNotFoundError): 

360 datastore.get(ref.makeComponentRef("data")) 

361 

362 # URI should fail unless we ask for prediction 

363 with self.assertRaises(FileNotFoundError): 

364 datastore.getURIs(ref) 

365 

366 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

367 if disassembled: 

368 self.assertIsNone(predicted_primary) 

369 self.assertEqual(len(predicted_disassembled), 3) 

370 for uri in predicted_disassembled.values(): 

371 self.assertEqual(uri.fragment, "predicted") 

372 self.assertIn(datasetTypeName, uri.path) 

373 else: 

374 self.assertIn(datasetTypeName, predicted_primary.path) 

375 self.assertFalse(predicted_disassembled) 

376 self.assertEqual(predicted_primary.fragment, "predicted") 

377 

378 # Now enable registry-free trusting mode 

379 datastore.trustGetRequest = True 

380 

381 # Try again to get it 

382 metricsOut = datastore.get(ref) 

383 self.assertEqual(metricsOut, metrics) 

384 

385 # Does it exist? 

386 self.assertTrue(datastore.exists(ref)) 

387 

388 # Get a component 

389 if sc_name != "StructuredDataNoComponents": 

390 comp = "data" 

391 compRef = ref.makeComponentRef(comp) 

392 output = datastore.get(compRef) 

393 self.assertEqual(output, getattr(metrics, comp)) 

394 

395 # Get the URI -- if we trust this should work even without 

396 # enabling prediction. 

397 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

398 self.assertEqual(primaryURI2, primaryURI) 

399 self.assertEqual(componentURIs2, componentURIs) 

400 

401 # Check for compatible storage class. 

402 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

403 # Make new dataset ref with compatible storage class. 

404 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

405 

406 # Without `set_retrieve_dataset_type_method` it will fail to 

407 # find correct file. 

408 self.assertFalse(datastore.exists(ref_comp)) 

409 with self.assertRaises(FileNotFoundError): 

410 datastore.get(ref_comp) 

411 with self.assertRaises(FileNotFoundError): 

412 datastore.get(ref, storageClass="StructuredDataDictJson") 

413 

414 # Need a special method to generate stored dataset type. 

415 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

416 if name == ref.datasetType.name: 

417 return ref.datasetType 

418 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

419 

420 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

421 

422 # Storage class override with original dataset ref. 

423 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

424 self.assertIsInstance(metrics_as_dict, dict) 

425 

426 # get() should return a dict now. 

427 metrics_as_dict = datastore.get(ref_comp) 

428 self.assertIsInstance(metrics_as_dict, dict) 

429 

430 # exists() should work as well. 

431 self.assertTrue(datastore.exists(ref_comp)) 

432 

433 datastore.set_retrieve_dataset_type_method(None) 

434 

435 def testDisassembly(self) -> None: 

436 """Test disassembly within datastore.""" 

437 metrics = makeExampleMetrics() 

438 if self.isEphemeral: 

439 # in-memory datastore does not disassemble 

440 return 

441 

442 # Create multiple storage classes for testing different formulations 

443 # of composites. One of these will not disassemble to provide 

444 # a reference. 

445 storageClasses = [ 

446 self.storageClassFactory.getStorageClass(sc) 

447 for sc in ( 

448 "StructuredComposite", 

449 "StructuredCompositeTestA", 

450 "StructuredCompositeTestB", 

451 "StructuredCompositeReadComp", 

452 "StructuredData", # No disassembly 

453 "StructuredCompositeReadCompNoDisassembly", 

454 ) 

455 ] 

456 

457 # Create the test datastore 

458 datastore = self.makeDatastore() 

459 

460 # Dummy dataId 

461 dimensions = self.universe.extract(("visit", "physical_filter")) 

462 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

463 

464 for i, sc in enumerate(storageClasses): 

465 with self.subTest(storageClass=sc.name): 

466 # Create a different dataset type each time round 

467 # so that a test failure in this subtest does not trigger 

468 # a cascade of tests because of file clashes 

469 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

470 

471 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

472 

473 datastore.put(metrics, ref) 

474 

475 baseURI, compURIs = datastore.getURIs(ref) 

476 if disassembled: 

477 self.assertIsNone(baseURI) 

478 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

479 else: 

480 self.assertIsNotNone(baseURI) 

481 self.assertEqual(compURIs, {}) 

482 

483 metrics_get = datastore.get(ref) 

484 self.assertEqual(metrics_get, metrics) 

485 

486 # Retrieve the composite with read parameter 

487 stop = 4 

488 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

489 self.assertEqual(metrics_get.summary, metrics.summary) 

490 self.assertEqual(metrics_get.output, metrics.output) 

491 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

492 

493 # Retrieve a component 

494 data = datastore.get(ref.makeComponentRef("data")) 

495 self.assertEqual(data, metrics.data) 

496 

497 # On supported storage classes attempt to access a read 

498 # only component 

499 if "ReadComp" in sc.name: 

500 cRef = ref.makeComponentRef("counter") 

501 counter = datastore.get(cRef) 

502 self.assertEqual(counter, len(metrics.data)) 

503 

504 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

505 self.assertEqual(counter, stop) 

506 

507 datastore.remove(ref) 

508 

509 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

510 metrics = makeExampleMetrics() 

511 datastore = self.makeDatastore() 

512 # Put 

513 dimensions = self.universe.extract(("visit", "physical_filter")) 

514 sc = self.storageClassFactory.getStorageClass("StructuredData") 

515 refs = [] 

516 for i in range(n_refs): 

517 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

518 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

519 datastore.put(metrics, ref) 

520 

521 # Does it exist? 

522 self.assertTrue(datastore.exists(ref)) 

523 

524 # Get 

525 metricsOut = datastore.get(ref) 

526 self.assertEqual(metrics, metricsOut) 

527 refs.append(ref) 

528 

529 return datastore, *refs 

530 

531 def testRemove(self) -> None: 

532 datastore, ref = self.prepDeleteTest() 

533 

534 # Remove 

535 datastore.remove(ref) 

536 

537 # Does it exist? 

538 self.assertFalse(datastore.exists(ref)) 

539 

540 # Do we now get a predicted URI? 

541 uri = datastore.getURI(ref, predict=True) 

542 self.assertEqual(uri.fragment, "predicted") 

543 

544 # Get should now fail 

545 with self.assertRaises(FileNotFoundError): 

546 datastore.get(ref) 

547 # Can only delete once 

548 with self.assertRaises(FileNotFoundError): 

549 datastore.remove(ref) 

550 

551 def testForget(self) -> None: 

552 datastore, ref = self.prepDeleteTest() 

553 

554 # Remove 

555 datastore.forget([ref]) 

556 

557 # Does it exist (as far as we know)? 

558 self.assertFalse(datastore.exists(ref)) 

559 

560 # Do we now get a predicted URI? 

561 uri = datastore.getURI(ref, predict=True) 

562 self.assertEqual(uri.fragment, "predicted") 

563 

564 # Get should now fail 

565 with self.assertRaises(FileNotFoundError): 

566 datastore.get(ref) 

567 

568 # Forgetting again is a silent no-op 

569 datastore.forget([ref]) 

570 

571 # Predicted URI should still point to the file. 

572 self.assertTrue(uri.exists()) 

573 

574 def testTransfer(self) -> None: 

575 metrics = makeExampleMetrics() 

576 

577 dimensions = self.universe.extract(("visit", "physical_filter")) 

578 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} 

579 

580 sc = self.storageClassFactory.getStorageClass("StructuredData") 

581 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

582 

583 inputDatastore = self.makeDatastore("test_input_datastore") 

584 outputDatastore = self.makeDatastore("test_output_datastore") 

585 

586 inputDatastore.put(metrics, ref) 

587 outputDatastore.transfer(inputDatastore, ref) 

588 

589 metricsOut = outputDatastore.get(ref) 

590 self.assertEqual(metrics, metricsOut) 

591 

592 def testBasicTransaction(self) -> None: 

593 datastore = self.makeDatastore() 

594 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

595 dimensions = self.universe.extract(("visit", "physical_filter")) 

596 nDatasets = 6 

597 dataIds = [ 

598 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) 

599 ] 

600 data = [ 

601 ( 

602 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

603 makeExampleMetrics(), 

604 ) 

605 for dataId in dataIds 

606 ] 

607 succeed = data[: nDatasets // 2] 

608 fail = data[nDatasets // 2 :] 

609 # All datasets added in this transaction should continue to exist 

610 with datastore.transaction(): 

611 for ref, metrics in succeed: 

612 datastore.put(metrics, ref) 

613 # Whereas datasets added in this transaction should not 

614 with self.assertRaises(TransactionTestError): 

615 with datastore.transaction(): 

616 for ref, metrics in fail: 

617 datastore.put(metrics, ref) 

618 raise TransactionTestError("This should propagate out of the context manager") 

619 # Check for datasets that should exist 

620 for ref, metrics in succeed: 

621 # Does it exist? 

622 self.assertTrue(datastore.exists(ref)) 

623 # Get 

624 metricsOut = datastore.get(ref, parameters=None) 

625 self.assertEqual(metrics, metricsOut) 

626 # URI 

627 uri = datastore.getURI(ref) 

628 self.assertEqual(uri.scheme, self.uriScheme) 

629 # Check for datasets that should not exist 

630 for ref, _ in fail: 

631 # These should raise 

632 with self.assertRaises(FileNotFoundError): 

633 # non-existing file 

634 datastore.get(ref) 

635 with self.assertRaises(FileNotFoundError): 

636 datastore.getURI(ref) 

637 

638 def testNestedTransaction(self) -> None: 

639 datastore = self.makeDatastore() 

640 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

641 dimensions = self.universe.extract(("visit", "physical_filter")) 

642 metrics = makeExampleMetrics() 

643 

644 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

645 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

646 datastore.put(metrics, refBefore) 

647 with self.assertRaises(TransactionTestError): 

648 with datastore.transaction(): 

649 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"} 

650 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

651 datastore.put(metrics, refOuter) 

652 with datastore.transaction(): 

653 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"} 

654 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

655 datastore.put(metrics, refInner) 

656 # All datasets should exist 

657 for ref in (refBefore, refOuter, refInner): 

658 metricsOut = datastore.get(ref, parameters=None) 

659 self.assertEqual(metrics, metricsOut) 

660 raise TransactionTestError("This should roll back the transaction") 

661 # Dataset(s) inserted before the transaction should still exist 

662 metricsOut = datastore.get(refBefore, parameters=None) 

663 self.assertEqual(metrics, metricsOut) 

664 # But all datasets inserted during the (rolled back) transaction 

665 # should be gone 

666 with self.assertRaises(FileNotFoundError): 

667 datastore.get(refOuter) 

668 with self.assertRaises(FileNotFoundError): 

669 datastore.get(refInner) 

670 

671 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

672 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

673 dimensions = self.universe.extract(("visit", "physical_filter")) 

674 metrics = makeExampleMetrics() 

675 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

676 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

677 return metrics, ref 

678 

679 def runIngestTest( 

680 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

681 ) -> None: 

682 metrics, ref = self._prepareIngestTest() 

683 # The file will be deleted after the test. 

684 # For symlink tests this leads to a situation where the datastore 

685 # points to a file that does not exist. This will make os.path.exist 

686 # return False but then the new symlink will fail with 

687 # FileExistsError later in the code so the test still passes. 

688 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

689 with open(path, "w") as fd: 

690 yaml.dump(metrics._asdict(), stream=fd) 

691 func(metrics, path, ref) 

692 

693 def testIngestNoTransfer(self) -> None: 

694 """Test ingesting existing files with no transfer.""" 

695 for mode in (None, "auto"): 

696 # Some datastores have auto but can't do in place transfer 

697 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

698 continue 

699 

700 with self.subTest(mode=mode): 

701 datastore = self.makeDatastore() 

702 

703 def succeed( 

704 obj: MetricsExample, 

705 path: str, 

706 ref: DatasetRef, 

707 mode: str | None = mode, 

708 datastore: Datastore = datastore, 

709 ) -> None: 

710 """Ingest a file already in the datastore root.""" 

711 # first move it into the root, and adjust the path 

712 # accordingly 

713 path = shutil.copy(path, datastore.root.ospath) 

714 path = os.path.relpath(path, start=datastore.root.ospath) 

715 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

716 self.assertEqual(obj, datastore.get(ref)) 

717 

718 def failInputDoesNotExist( 

719 obj: MetricsExample, 

720 path: str, 

721 ref: DatasetRef, 

722 mode: str | None = mode, 

723 datastore: Datastore = datastore, 

724 ) -> None: 

725 """Can't ingest files if we're given a bad path.""" 

726 with self.assertRaises(FileNotFoundError): 

727 datastore.ingest( 

728 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

729 ) 

730 self.assertFalse(datastore.exists(ref)) 

731 

732 def failOutsideRoot( 

733 obj: MetricsExample, 

734 path: str, 

735 ref: DatasetRef, 

736 mode: str | None = mode, 

737 datastore: Datastore = datastore, 

738 ) -> None: 

739 """Can't ingest files outside of datastore root unless 

740 auto. 

741 """ 

742 if mode == "auto": 

743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

744 self.assertTrue(datastore.exists(ref)) 

745 else: 

746 with self.assertRaises(RuntimeError): 

747 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

748 self.assertFalse(datastore.exists(ref)) 

749 

750 def failNotImplemented( 

751 obj: MetricsExample, 

752 path: str, 

753 ref: DatasetRef, 

754 mode: str | None = mode, 

755 datastore: Datastore = datastore, 

756 ) -> None: 

757 with self.assertRaises(NotImplementedError): 

758 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

759 

760 if mode in self.ingestTransferModes: 

761 self.runIngestTest(failOutsideRoot) 

762 self.runIngestTest(failInputDoesNotExist) 

763 self.runIngestTest(succeed) 

764 else: 

765 self.runIngestTest(failNotImplemented) 

766 

767 def testIngestTransfer(self) -> None: 

768 """Test ingesting existing files after transferring them.""" 

769 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

770 with self.subTest(mode=mode): 

771 datastore = self.makeDatastore(mode) 

772 

773 def succeed( 

774 obj: MetricsExample, 

775 path: str, 

776 ref: DatasetRef, 

777 mode: str | None = mode, 

778 datastore: Datastore = datastore, 

779 ) -> None: 

780 """Ingest a file by transferring it to the template 

781 location. 

782 """ 

783 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

784 self.assertEqual(obj, datastore.get(ref)) 

785 

786 def failInputDoesNotExist( 

787 obj: MetricsExample, 

788 path: str, 

789 ref: DatasetRef, 

790 mode: str | None = mode, 

791 datastore: Datastore = datastore, 

792 ) -> None: 

793 """Can't ingest files if we're given a bad path.""" 

794 with self.assertRaises(FileNotFoundError): 

795 # Ensure the file does not look like it is in 

796 # datastore for auto mode 

797 datastore.ingest( 

798 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

799 ) 

800 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

801 

802 def failNotImplemented( 

803 obj: MetricsExample, 

804 path: str, 

805 ref: DatasetRef, 

806 mode: str | None = mode, 

807 datastore: Datastore = datastore, 

808 ) -> None: 

809 with self.assertRaises(NotImplementedError): 

810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

811 

812 if mode in self.ingestTransferModes: 

813 self.runIngestTest(failInputDoesNotExist) 

814 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

815 else: 

816 self.runIngestTest(failNotImplemented) 

817 

818 def testIngestSymlinkOfSymlink(self) -> None: 

819 """Special test for symlink to a symlink ingest""" 

820 metrics, ref = self._prepareIngestTest() 

821 # The aim of this test is to create a dataset on disk, then 

822 # create a symlink to it and finally ingest the symlink such that 

823 # the symlink in the datastore points to the original dataset. 

824 for mode in ("symlink", "relsymlink"): 

825 if mode not in self.ingestTransferModes: 

826 continue 

827 

828 print(f"Trying mode {mode}") 

829 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

830 with open(realpath, "w") as fd: 

831 yaml.dump(metrics._asdict(), stream=fd) 

832 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

833 os.symlink(os.path.abspath(realpath), sympath) 

834 

835 datastore = self.makeDatastore() 

836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

837 

838 uri = datastore.getURI(ref) 

839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

841 

842 linkTarget = os.readlink(uri.ospath) 

843 if mode == "relsymlink": 

844 self.assertFalse(os.path.isabs(linkTarget)) 

845 else: 

846 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

847 

848 # Check that we can get the dataset back regardless of mode 

849 metric2 = datastore.get(ref) 

850 self.assertEqual(metric2, metrics) 

851 

852 # Cleanup the file for next time round loop 

853 # since it will get the same file name in store 

854 datastore.remove(ref) 

855 

856 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

857 datastore = self.makeDatastore(name) 

858 

859 # For now only the FileDatastore can be used for this test. 

860 # ChainedDatastore that only includes InMemoryDatastores have to be 

861 # skipped as well. 

862 for name in datastore.names: 

863 if not name.startswith("InMemoryDatastore"): 

864 break 

865 else: 

866 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

867 

868 metrics = makeExampleMetrics() 

869 dimensions = self.universe.extract(("visit", "physical_filter")) 

870 sc = self.storageClassFactory.getStorageClass("StructuredData") 

871 

872 refs = [] 

873 for visit in (2048, 2049, 2050): 

874 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

875 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

876 datastore.put(metrics, ref) 

877 refs.append(ref) 

878 return datastore, refs 

879 

880 def testExportImportRecords(self) -> None: 

881 """Test for export_records and import_records methods.""" 

882 datastore, refs = self._populate_export_datastore("test_datastore") 

883 for exported_refs in (refs, refs[1:]): 

884 n_refs = len(exported_refs) 

885 records = datastore.export_records(exported_refs) 

886 self.assertGreater(len(records), 0) 

887 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

888 # In a ChainedDatastore each FileDatastore will have a complete set 

889 for datastore_name in records: 

890 record_data = records[datastore_name] 

891 self.assertEqual(len(record_data.records), n_refs) 

892 

893 # Check that subsetting works, include non-existing dataset ID. 

894 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

895 subset = record_data.subset(dataset_ids) 

896 assert subset is not None 

897 self.assertEqual(len(subset.records), 1) 

898 subset = record_data.subset({uuid.uuid4()}) 

899 self.assertIsNone(subset) 

900 

901 # Use the same datastore name to import relative path. 

902 datastore2 = self.makeDatastore("test_datastore") 

903 

904 records = datastore.export_records(refs[1:]) 

905 datastore2.import_records(records) 

906 

907 with self.assertRaises(FileNotFoundError): 

908 data = datastore2.get(refs[0]) 

909 data = datastore2.get(refs[1]) 

910 self.assertIsNotNone(data) 

911 data = datastore2.get(refs[2]) 

912 self.assertIsNotNone(data) 

913 

914 def testExport(self) -> None: 

915 datastore, refs = self._populate_export_datastore("test_datastore") 

916 

917 datasets = list(datastore.export(refs)) 

918 self.assertEqual(len(datasets), 3) 

919 

920 for transfer in (None, "auto"): 

921 # Both will default to None 

922 datasets = list(datastore.export(refs, transfer=transfer)) 

923 self.assertEqual(len(datasets), 3) 

924 

925 with self.assertRaises(TypeError): 

926 list(datastore.export(refs, transfer="copy")) 

927 

928 with self.assertRaises(TypeError): 

929 list(datastore.export(refs, directory="exportDir", transfer="move")) 

930 

931 # Create a new ref that is not known to the datastore and try to 

932 # export it. 

933 sc = self.storageClassFactory.getStorageClass("ThingOne") 

934 dimensions = self.universe.extract(("visit", "physical_filter")) 

935 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

936 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

937 with self.assertRaises(FileNotFoundError): 

938 list(datastore.export(refs + [ref], transfer=None)) 

939 

940 def test_pydantic_dict_storage_class_conversions(self) -> None: 

941 """Test converting a dataset stored as a pydantic model into a dict on 

942 read. 

943 """ 

944 datastore = self.makeDatastore() 

945 store_as_model = self.makeDatasetRef( 

946 "store_as_model", 

947 dimensions=self.universe.empty, 

948 storageClass="DictConvertibleModel", 

949 dataId=DataCoordinate.makeEmpty(self.universe), 

950 ) 

951 content = {"a": "one", "b": "two"} 

952 model = DictConvertibleModel.from_dict(content, extra="original content") 

953 datastore.put(model, store_as_model) 

954 retrieved_model = datastore.get(store_as_model) 

955 self.assertEqual(retrieved_model, model) 

956 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

957 self.assertEqual(type(loaded), dict) 

958 self.assertEqual(loaded, content) 

959 

960 def test_simple_class_put_get(self) -> None: 

961 """Test that we can put and get a simple class with dict() 

962 constructor. 

963 """ 

964 datastore = self.makeDatastore() 

965 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

966 self._assert_different_puts(datastore, "MetricsExample", data) 

967 

968 def test_dataclass_put_get(self) -> None: 

969 """Test that we can put and get a simple dataclass.""" 

970 datastore = self.makeDatastore() 

971 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

972 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

973 

974 def test_pydantic_put_get(self) -> None: 

975 """Test that we can put and get a simple Pydantic model.""" 

976 datastore = self.makeDatastore() 

977 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

978 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

979 

980 def test_tuple_put_get(self) -> None: 

981 """Test that we can put and get a tuple.""" 

982 datastore = self.makeDatastore() 

983 data = ("a", "b", 1) 

984 self._assert_different_puts(datastore, "TupleExample", data) 

985 

986 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

987 refs = { 

988 x: self.makeDatasetRef( 

989 f"stora_as_{x}", 

990 dimensions=self.universe.empty, 

991 storageClass=f"{storageClass_root}{x}", 

992 dataId=DataCoordinate.makeEmpty(self.universe), 

993 ) 

994 for x in ["A", "B"] 

995 } 

996 

997 for ref in refs.values(): 

998 datastore.put(data, ref) 

999 

1000 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1001 

1002 

1003class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1004 """PosixDatastore specialization""" 

1005 

1006 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1007 uriScheme = "file" 

1008 canIngestNoTransferAuto = True 

1009 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1010 isEphemeral = False 

1011 rootKeys = ("root",) 

1012 validationCanFail = True 

1013 

1014 def setUp(self) -> None: 

1015 # Override the working directory before calling the base class 

1016 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1017 super().setUp() 

1018 

1019 def testAtomicWrite(self) -> None: 

1020 """Test that we write to a temporary and then rename""" 

1021 datastore = self.makeDatastore() 

1022 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1023 dimensions = self.universe.extract(("visit", "physical_filter")) 

1024 metrics = makeExampleMetrics() 

1025 

1026 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1027 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1028 

1029 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1030 datastore.put(metrics, ref) 

1031 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1032 self.assertIn("transfer=move", move_logs[0]) 

1033 

1034 # And the transfer should be file to file. 

1035 self.assertEqual(move_logs[0].count("file://"), 2) 

1036 

1037 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1038 """Verify that the expected exception is raised if the FileDatastore 

1039 can not determine the put formatter location. 

1040 """ 

1041 _ = makeExampleMetrics() 

1042 datastore = self.makeDatastore() 

1043 

1044 # Create multiple storage classes for testing different formulations 

1045 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1046 

1047 sccomp = StorageClass("Dummy") 

1048 compositeStorageClass = StorageClass( 

1049 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1050 ) 

1051 

1052 dimensions = self.universe.extract(("visit", "physical_filter")) 

1053 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1054 

1055 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1056 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1057 

1058 def raiser(ref: DatasetRef) -> None: 

1059 raise DatasetTypeNotSupportedError() 

1060 

1061 with unittest.mock.patch.object( 

1062 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1063 "_determine_put_formatter_location", 

1064 side_effect=raiser, 

1065 ): 

1066 # verify the non-composite ref execution path: 

1067 with self.assertRaises(DatasetTypeNotSupportedError): 

1068 datastore.getURIs(ref, predict=True) 

1069 

1070 # verify the composite-ref execution path: 

1071 with self.assertRaises(DatasetTypeNotSupportedError): 

1072 datastore.getURIs(compRef, predict=True) 

1073 

1074 def test_roots(self): 

1075 datastore = self.makeDatastore() 

1076 

1077 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1078 for root in datastore.roots.values(): 

1079 if root is not None: 

1080 self.assertTrue(root.exists()) 

1081 

1082 

1083class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1084 """Posix datastore tests but with checksums disabled.""" 

1085 

1086 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1087 

1088 def testChecksum(self) -> None: 

1089 """Ensure that checksums have not been calculated.""" 

1090 datastore = self.makeDatastore() 

1091 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1092 dimensions = self.universe.extract(("visit", "physical_filter")) 

1093 metrics = makeExampleMetrics() 

1094 

1095 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1096 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1097 

1098 # Configuration should have disabled checksum calculation 

1099 datastore.put(metrics, ref) 

1100 infos = datastore.getStoredItemsInfo(ref) 

1101 self.assertIsNone(infos[0].checksum) 

1102 

1103 # Remove put back but with checksums enabled explicitly 

1104 datastore.remove(ref) 

1105 datastore.useChecksum = True 

1106 datastore.put(metrics, ref) 

1107 

1108 infos = datastore.getStoredItemsInfo(ref) 

1109 self.assertIsNotNone(infos[0].checksum) 

1110 

1111 def test_repeat_ingest(self): 

1112 """Test that repeatedly ingesting the same file in direct mode 

1113 is allowed. 

1114 

1115 Test can only run with FileDatastore since that is the only one 

1116 supporting "direct" ingest. 

1117 """ 

1118 metrics, v4ref = self._prepareIngestTest() 

1119 datastore = self.makeDatastore() 

1120 v5ref = DatasetRef( 

1121 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN 

1122 ) 

1123 

1124 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=True) as path: 

1125 with open(path, "w") as fd: 

1126 yaml.dump(metrics._asdict(), stream=fd) 

1127 

1128 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1129 

1130 # This will fail because the ref is using UUIDv4. 

1131 with self.assertRaises(RuntimeError): 

1132 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1133 

1134 # UUIDv5 can be repeatedly ingested in direct mode. 

1135 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1136 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1137 

1138 with self.assertRaises(RuntimeError): 

1139 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy") 

1140 

1141 

1142class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1143 """Restrict trash test to FileDatastore.""" 

1144 

1145 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1146 

1147 def testTrash(self) -> None: 

1148 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1149 

1150 # Trash one of them. 

1151 ref = refs.pop() 

1152 uri = datastore.getURI(ref) 

1153 datastore.trash(ref) 

1154 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1155 datastore.emptyTrash() 

1156 self.assertFalse(uri.exists(), uri) 

1157 

1158 # Trash it again should be fine. 

1159 datastore.trash(ref) 

1160 

1161 # Trash multiple items at once. 

1162 subset = [refs.pop(), refs.pop()] 

1163 datastore.trash(subset) 

1164 datastore.emptyTrash() 

1165 

1166 # Remove a record and trash should do nothing. 

1167 # This is execution butler scenario. 

1168 ref = refs.pop() 

1169 uri = datastore.getURI(ref) 

1170 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1171 self.assertTrue(uri.exists()) 

1172 datastore.trash(ref) 

1173 datastore.emptyTrash() 

1174 self.assertTrue(uri.exists()) 

1175 

1176 # Switch on trust and it should delete the file. 

1177 datastore.trustGetRequest = True 

1178 datastore.trash([ref]) 

1179 self.assertFalse(uri.exists()) 

1180 

1181 # Remove multiples at once in trust mode. 

1182 subset = [refs.pop() for i in range(3)] 

1183 datastore.trash(subset) 

1184 datastore.trash(refs.pop()) # Check that a single ref can trash 

1185 

1186 

1187class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1188 """Test datastore cleans up on failure.""" 

1189 

1190 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1191 

1192 def setUp(self) -> None: 

1193 # Override the working directory before calling the base class 

1194 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1195 super().setUp() 

1196 

1197 def testCleanup(self) -> None: 

1198 """Test that a failed formatter write does cleanup a partial file.""" 

1199 metrics = makeExampleMetrics() 

1200 datastore = self.makeDatastore() 

1201 

1202 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1203 

1204 dimensions = self.universe.extract(("visit", "physical_filter")) 

1205 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1206 

1207 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1208 

1209 # Determine where the file will end up (we assume Formatters use 

1210 # the same file extension) 

1211 expectedUri = datastore.getURI(ref, predict=True) 

1212 self.assertEqual(expectedUri.fragment, "predicted") 

1213 

1214 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1215 

1216 # Try formatter that fails and formatter that fails and leaves 

1217 # a file behind 

1218 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1219 with self.subTest(formatter=formatter): 

1220 # Monkey patch the formatter 

1221 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1222 

1223 # Try to put the dataset, it should fail 

1224 with self.assertRaises(RuntimeError): 

1225 datastore.put(metrics, ref) 

1226 

1227 # Check that there is no file on disk 

1228 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1229 

1230 # Check that there is a directory 

1231 dir = expectedUri.dirname() 

1232 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1233 

1234 # Force YamlFormatter and check that this time a file is written 

1235 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1236 datastore.put(metrics, ref) 

1237 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1238 datastore.remove(ref) 

1239 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1240 

1241 

1242class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1243 """PosixDatastore specialization""" 

1244 

1245 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1246 uriScheme = "mem" 

1247 hasUnsupportedPut = False 

1248 ingestTransferModes = () 

1249 isEphemeral = True 

1250 rootKeys = None 

1251 validationCanFail = False 

1252 

1253 

1254class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1255 """ChainedDatastore specialization using a POSIXDatastore""" 

1256 

1257 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1258 hasUnsupportedPut = False 

1259 canIngestNoTransferAuto = False 

1260 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1261 isEphemeral = False 

1262 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1263 validationCanFail = True 

1264 

1265 

1266class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1267 """ChainedDatastore specialization using all InMemoryDatastore""" 

1268 

1269 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1270 validationCanFail = False 

1271 

1272 

1273class DatastoreConstraintsTests(DatastoreTestsBase): 

1274 """Basic tests of constraints model of Datastores.""" 

1275 

1276 def testConstraints(self) -> None: 

1277 """Test constraints model. Assumes that each test class has the 

1278 same constraints. 

1279 """ 

1280 metrics = makeExampleMetrics() 

1281 datastore = self.makeDatastore() 

1282 

1283 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1284 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1285 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1286 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1287 

1288 # Write empty file suitable for ingest check (JSON and YAML variants) 

1289 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1290 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1291 for datasetTypeName, sc, accepted in ( 

1292 ("metric", sc1, True), 

1293 ("metric5", sc1, False), 

1294 ("metric33", sc1, True), 

1295 ("metric5", sc2, True), 

1296 ): 

1297 # Choose different temp file depending on StorageClass 

1298 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1299 

1300 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1301 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1302 if accepted: 

1303 datastore.put(metrics, ref) 

1304 self.assertTrue(datastore.exists(ref)) 

1305 datastore.remove(ref) 

1306 

1307 # Try ingest 

1308 if self.canIngest: 

1309 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1310 self.assertTrue(datastore.exists(ref)) 

1311 datastore.remove(ref) 

1312 else: 

1313 with self.assertRaises(DatasetTypeNotSupportedError): 

1314 datastore.put(metrics, ref) 

1315 self.assertFalse(datastore.exists(ref)) 

1316 

1317 # Again with ingest 

1318 if self.canIngest: 

1319 with self.assertRaises(DatasetTypeNotSupportedError): 

1320 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1321 self.assertFalse(datastore.exists(ref)) 

1322 

1323 

1324class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1325 """PosixDatastore specialization""" 

1326 

1327 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1328 canIngest = True 

1329 

1330 def setUp(self) -> None: 

1331 # Override the working directory before calling the base class 

1332 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1333 super().setUp() 

1334 

1335 

1336class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1337 """InMemoryDatastore specialization.""" 

1338 

1339 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1340 canIngest = False 

1341 

1342 

1343class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1344 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1345 at the ChainedDatstore. 

1346 """ 

1347 

1348 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1349 

1350 

1351class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1352 """ChainedDatastore specialization using a POSIXDatastore.""" 

1353 

1354 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1355 

1356 

1357class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1358 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1359 

1360 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1361 canIngest = False 

1362 

1363 

1364class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1365 """Test that a chained datastore can control constraints per-datastore 

1366 even if child datastore would accept. 

1367 """ 

1368 

1369 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1370 

1371 def setUp(self) -> None: 

1372 # Override the working directory before calling the base class 

1373 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1374 super().setUp() 

1375 

1376 def testConstraints(self) -> None: 

1377 """Test chained datastore constraints model.""" 

1378 metrics = makeExampleMetrics() 

1379 datastore = self.makeDatastore() 

1380 

1381 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1382 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1383 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1384 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1385 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1386 

1387 # Write empty file suitable for ingest check (JSON and YAML variants) 

1388 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1389 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1390 

1391 for typeName, dataId, sc, accept, ingest in ( 

1392 ("metric", dataId1, sc1, (False, True, False), True), 

1393 ("metric5", dataId1, sc1, (False, False, False), False), 

1394 ("metric5", dataId2, sc1, (True, False, False), False), 

1395 ("metric33", dataId2, sc2, (True, True, False), True), 

1396 ("metric5", dataId1, sc2, (False, True, False), True), 

1397 ): 

1398 # Choose different temp file depending on StorageClass 

1399 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1400 

1401 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1402 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1403 if any(accept): 

1404 datastore.put(metrics, ref) 

1405 self.assertTrue(datastore.exists(ref)) 

1406 

1407 # Check each datastore inside the chained datastore 

1408 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1409 self.assertEqual( 

1410 childDatastore.exists(ref), 

1411 expected, 

1412 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1413 ) 

1414 

1415 datastore.remove(ref) 

1416 

1417 # Check that ingest works 

1418 if ingest: 

1419 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1420 self.assertTrue(datastore.exists(ref)) 

1421 

1422 # Check each datastore inside the chained datastore 

1423 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1424 # Ephemeral datastores means InMemory at the moment 

1425 # and that does not accept ingest of files. 

1426 if childDatastore.isEphemeral: 

1427 expected = False 

1428 self.assertEqual( 

1429 childDatastore.exists(ref), 

1430 expected, 

1431 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1432 ) 

1433 

1434 datastore.remove(ref) 

1435 else: 

1436 with self.assertRaises(DatasetTypeNotSupportedError): 

1437 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1438 

1439 else: 

1440 with self.assertRaises(DatasetTypeNotSupportedError): 

1441 datastore.put(metrics, ref) 

1442 self.assertFalse(datastore.exists(ref)) 

1443 

1444 # Again with ingest 

1445 with self.assertRaises(DatasetTypeNotSupportedError): 

1446 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1447 self.assertFalse(datastore.exists(ref)) 

1448 

1449 

1450class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1451 """Tests for datastore caching infrastructure.""" 

1452 

1453 @classmethod 

1454 def setUpClass(cls) -> None: 

1455 cls.storageClassFactory = StorageClassFactory() 

1456 cls.universe = DimensionUniverse() 

1457 

1458 # Ensure that we load the test storage class definitions. 

1459 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1460 cls.storageClassFactory.addFromConfig(scConfigFile) 

1461 

1462 def setUp(self) -> None: 

1463 self.id = 0 

1464 

1465 # Create a root that we can use for caching tests. 

1466 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1467 

1468 # Create some test dataset refs and associated test files 

1469 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1470 dimensions = self.universe.extract(("visit", "physical_filter")) 

1471 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1472 

1473 # Create list of refs and list of temporary files 

1474 n_datasets = 10 

1475 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1476 

1477 root_uri = ResourcePath(self.root, forceDirectory=True) 

1478 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1479 

1480 # Create test files. 

1481 for uri in self.files: 

1482 uri.write(b"0123456789") 

1483 

1484 # Create some composite refs with component files. 

1485 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1486 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1487 self.comp_files = [] 

1488 self.comp_refs = [] 

1489 for n, ref in enumerate(self.composite_refs): 

1490 component_refs = [] 

1491 component_files = [] 

1492 for component in sc.components: 

1493 component_ref = ref.makeComponentRef(component) 

1494 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1495 component_refs.append(component_ref) 

1496 component_files.append(file) 

1497 file.write(b"9876543210") 

1498 

1499 self.comp_files.append(component_files) 

1500 self.comp_refs.append(component_refs) 

1501 

1502 def tearDown(self) -> None: 

1503 if self.root is not None and os.path.exists(self.root): 

1504 shutil.rmtree(self.root, ignore_errors=True) 

1505 

1506 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1507 config = Config.fromYaml(config_str) 

1508 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1509 

1510 def testNoCacheDir(self) -> None: 

1511 config_str = """ 

1512cached: 

1513 root: null 

1514 cacheable: 

1515 metric0: true 

1516 """ 

1517 cache_manager = self._make_cache_manager(config_str) 

1518 

1519 # Look inside to check we don't have a cache directory 

1520 self.assertIsNone(cache_manager._cache_directory) 

1521 

1522 self.assertCache(cache_manager) 

1523 

1524 # Test that the cache directory is marked temporary 

1525 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1526 

1527 def testNoCacheDirReversed(self) -> None: 

1528 """Use default caching status and metric1 to false""" 

1529 config_str = """ 

1530cached: 

1531 root: null 

1532 default: true 

1533 cacheable: 

1534 metric1: false 

1535 """ 

1536 cache_manager = self._make_cache_manager(config_str) 

1537 

1538 self.assertCache(cache_manager) 

1539 

1540 def testEnvvarCacheDir(self) -> None: 

1541 config_str = f""" 

1542cached: 

1543 root: '{self.root}' 

1544 cacheable: 

1545 metric0: true 

1546 """ 

1547 

1548 root = ResourcePath(self.root, forceDirectory=True) 

1549 env_dir = root.join("somewhere", forceDirectory=True) 

1550 elsewhere = root.join("elsewhere", forceDirectory=True) 

1551 

1552 # Environment variable should override the config value. 

1553 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1554 cache_manager = self._make_cache_manager(config_str) 

1555 self.assertEqual(cache_manager.cache_directory, env_dir) 

1556 

1557 # This environment variable should not override the config value. 

1558 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1559 cache_manager = self._make_cache_manager(config_str) 

1560 self.assertEqual(cache_manager.cache_directory, root) 

1561 

1562 # No default setting. 

1563 config_str = """ 

1564cached: 

1565 root: null 

1566 default: true 

1567 cacheable: 

1568 metric1: false 

1569 """ 

1570 cache_manager = self._make_cache_manager(config_str) 

1571 

1572 # This environment variable should override the config value. 

1573 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1574 cache_manager = self._make_cache_manager(config_str) 

1575 self.assertEqual(cache_manager.cache_directory, env_dir) 

1576 

1577 # If both environment variables are set the main (not IF_UNSET) 

1578 # variable should win. 

1579 with unittest.mock.patch.dict( 

1580 os.environ, 

1581 { 

1582 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1583 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1584 }, 

1585 ): 

1586 cache_manager = self._make_cache_manager(config_str) 

1587 self.assertEqual(cache_manager.cache_directory, env_dir) 

1588 

1589 # Use the API to set the environment variable, making sure that the 

1590 # variable is reset on exit. 

1591 with unittest.mock.patch.dict( 

1592 os.environ, 

1593 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1594 ): 

1595 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1596 self.assertTrue(defined) 

1597 cache_manager = self._make_cache_manager(config_str) 

1598 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1599 

1600 # Now create the cache manager ahead of time and set the fallback 

1601 # later. 

1602 cache_manager = self._make_cache_manager(config_str) 

1603 self.assertIsNone(cache_manager._cache_directory) 

1604 with unittest.mock.patch.dict( 

1605 os.environ, 

1606 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1607 ): 

1608 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1609 self.assertTrue(defined) 

1610 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1611 

1612 def testExplicitCacheDir(self) -> None: 

1613 config_str = f""" 

1614cached: 

1615 root: '{self.root}' 

1616 cacheable: 

1617 metric0: true 

1618 """ 

1619 cache_manager = self._make_cache_manager(config_str) 

1620 

1621 # Look inside to check we do have a cache directory. 

1622 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1623 

1624 self.assertCache(cache_manager) 

1625 

1626 # Test that the cache directory is not marked temporary 

1627 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1628 

1629 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1630 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1631 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1632 

1633 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1634 self.assertIsInstance(uri, ResourcePath) 

1635 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1636 

1637 # Check presence in cache using ref and then using file extension. 

1638 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1639 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1640 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1641 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1642 

1643 # Cached file should no longer exist but uncached file should be 

1644 # unaffected. 

1645 self.assertFalse(self.files[0].exists()) 

1646 self.assertTrue(self.files[1].exists()) 

1647 

1648 # Should find this file and it should be within the cache directory. 

1649 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1650 self.assertTrue(found.exists()) 

1651 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1652 

1653 # Should not be able to find these in cache 

1654 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1655 self.assertIsNone(found) 

1656 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1657 self.assertIsNone(found) 

1658 

1659 def testNoCache(self) -> None: 

1660 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1661 for uri, ref in zip(self.files, self.refs, strict=True): 

1662 self.assertFalse(cache_manager.should_be_cached(ref)) 

1663 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1664 self.assertFalse(cache_manager.known_to_cache(ref)) 

1665 with cache_manager.find_in_cache(ref, ".txt") as found: 

1666 self.assertIsNone(found, msg=f"{cache_manager}") 

1667 

1668 def _expiration_config(self, mode: str, threshold: int) -> str: 

1669 return f""" 

1670cached: 

1671 default: true 

1672 expiry: 

1673 mode: {mode} 

1674 threshold: {threshold} 

1675 cacheable: 

1676 unused: true 

1677 """ 

1678 

1679 def testCacheExpiryFiles(self) -> None: 

1680 threshold = 2 # Keep at least 2 files. 

1681 mode = "files" 

1682 config_str = self._expiration_config(mode, threshold) 

1683 

1684 cache_manager = self._make_cache_manager(config_str) 

1685 

1686 # Check that an empty cache returns unknown for arbitrary ref 

1687 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1688 

1689 # Should end with datasets: 2, 3, 4 

1690 self.assertExpiration(cache_manager, 5, threshold + 1) 

1691 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1692 

1693 # Check that we will not expire a file that is actively in use. 

1694 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1695 self.assertIsNotNone(found) 

1696 

1697 # Trigger cache expiration that should remove the file 

1698 # we just retrieved. Should now have: 3, 4, 5 

1699 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1700 self.assertIsNotNone(cached) 

1701 

1702 # Cache should still report the standard file count. 

1703 self.assertEqual(cache_manager.file_count, threshold + 1) 

1704 

1705 # Add additional entry to cache. 

1706 # Should now have 4, 5, 6 

1707 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1708 self.assertIsNotNone(cached) 

1709 

1710 # Is the file still there? 

1711 self.assertTrue(found.exists()) 

1712 

1713 # Can we read it? 

1714 data = found.read() 

1715 self.assertGreater(len(data), 0) 

1716 

1717 # Outside context the file should no longer exist. 

1718 self.assertFalse(found.exists()) 

1719 

1720 # File count should not have changed. 

1721 self.assertEqual(cache_manager.file_count, threshold + 1) 

1722 

1723 # Dataset 2 was in the exempt directory but because hardlinks 

1724 # are used it was deleted from the main cache during cache expiry 

1725 # above and so should no longer be found. 

1726 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1727 self.assertIsNone(found) 

1728 

1729 # And the one stored after it is also gone. 

1730 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1731 self.assertIsNone(found) 

1732 

1733 # But dataset 4 is present. 

1734 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1735 self.assertIsNotNone(found) 

1736 

1737 # Adding a new dataset to the cache should now delete it. 

1738 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1739 

1740 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1741 self.assertIsNone(found) 

1742 

1743 def testCacheExpiryDatasets(self) -> None: 

1744 threshold = 2 # Keep 2 datasets. 

1745 mode = "datasets" 

1746 config_str = self._expiration_config(mode, threshold) 

1747 

1748 cache_manager = self._make_cache_manager(config_str) 

1749 self.assertExpiration(cache_manager, 5, threshold + 1) 

1750 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1751 

1752 def testCacheExpiryDatasetsComposite(self) -> None: 

1753 threshold = 2 # Keep 2 datasets. 

1754 mode = "datasets" 

1755 config_str = self._expiration_config(mode, threshold) 

1756 

1757 cache_manager = self._make_cache_manager(config_str) 

1758 

1759 n_datasets = 3 

1760 for i in range(n_datasets): 

1761 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1762 cached = cache_manager.move_to_cache(component_file, component_ref) 

1763 self.assertIsNotNone(cached) 

1764 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1765 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1766 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1767 

1768 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1769 

1770 # Write two new non-composite and the number of files should drop. 

1771 self.assertExpiration(cache_manager, 2, 5) 

1772 

1773 def testCacheExpirySize(self) -> None: 

1774 threshold = 55 # Each file is 10 bytes 

1775 mode = "size" 

1776 config_str = self._expiration_config(mode, threshold) 

1777 

1778 cache_manager = self._make_cache_manager(config_str) 

1779 self.assertExpiration(cache_manager, 10, 6) 

1780 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1781 

1782 def assertExpiration( 

1783 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1784 ) -> None: 

1785 """Insert the datasets and then check the number retained.""" 

1786 for i in range(n_datasets): 

1787 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1788 self.assertIsNotNone(cached) 

1789 

1790 self.assertEqual(cache_manager.file_count, n_retained) 

1791 

1792 # The oldest file should not be in the cache any more. 

1793 for i in range(n_datasets): 

1794 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1795 if i >= n_datasets - n_retained: 

1796 self.assertIsInstance(found, ResourcePath) 

1797 else: 

1798 self.assertIsNone(found) 

1799 

1800 def testCacheExpiryAge(self) -> None: 

1801 threshold = 1 # Expire older than 2 seconds 

1802 mode = "age" 

1803 config_str = self._expiration_config(mode, threshold) 

1804 

1805 cache_manager = self._make_cache_manager(config_str) 

1806 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1807 

1808 # Insert 3 files, then sleep, then insert more. 

1809 for i in range(2): 

1810 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1811 self.assertIsNotNone(cached) 

1812 time.sleep(2.0) 

1813 for j in range(4): 

1814 i = 2 + j # Continue the counting 

1815 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1816 self.assertIsNotNone(cached) 

1817 

1818 # Only the files written after the sleep should exist. 

1819 self.assertEqual(cache_manager.file_count, 4) 

1820 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1821 self.assertIsNone(found) 

1822 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1823 self.assertIsInstance(found, ResourcePath) 

1824 

1825 

1826class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1827 """Test the null datastore.""" 

1828 

1829 storageClassFactory = StorageClassFactory() 

1830 

1831 def test_basics(self) -> None: 

1832 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1833 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1834 

1835 null = NullDatastore(None, None) 

1836 

1837 self.assertFalse(null.exists(ref)) 

1838 self.assertFalse(null.knows(ref)) 

1839 knows = null.knows_these([ref]) 

1840 self.assertFalse(knows[ref]) 

1841 null.validateConfiguration(ref) 

1842 

1843 with self.assertRaises(FileNotFoundError): 

1844 null.get(ref) 

1845 with self.assertRaises(NotImplementedError): 

1846 null.put("", ref) 

1847 with self.assertRaises(FileNotFoundError): 

1848 null.getURI(ref) 

1849 with self.assertRaises(FileNotFoundError): 

1850 null.getURIs(ref) 

1851 with self.assertRaises(FileNotFoundError): 

1852 null.getManyURIs([ref]) 

1853 with self.assertRaises(NotImplementedError): 

1854 null.getLookupKeys() 

1855 with self.assertRaises(NotImplementedError): 

1856 null.import_records({}) 

1857 with self.assertRaises(NotImplementedError): 

1858 null.export_records([]) 

1859 with self.assertRaises(NotImplementedError): 

1860 null.export([ref]) 

1861 with self.assertRaises(NotImplementedError): 

1862 null.transfer(null, ref) 

1863 with self.assertRaises(NotImplementedError): 

1864 null.emptyTrash() 

1865 with self.assertRaises(NotImplementedError): 

1866 null.trash(ref) 

1867 with self.assertRaises(NotImplementedError): 

1868 null.forget([ref]) 

1869 with self.assertRaises(NotImplementedError): 

1870 null.remove(ref) 

1871 with self.assertRaises(NotImplementedError): 

1872 null.retrieveArtifacts([ref], ResourcePath(".")) 

1873 with self.assertRaises(NotImplementedError): 

1874 null.transfer_from(null, [ref]) 

1875 with self.assertRaises(NotImplementedError): 

1876 null.ingest() 

1877 

1878 

1879class DatasetRefURIsTestCase(unittest.TestCase): 

1880 """Tests for DatasetRefURIs.""" 

1881 

1882 def testSequenceAccess(self) -> None: 

1883 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1884 uris = DatasetRefURIs() 

1885 

1886 self.assertEqual(len(uris), 2) 

1887 self.assertEqual(uris[0], None) 

1888 self.assertEqual(uris[1], {}) 

1889 

1890 primaryURI = ResourcePath("1/2/3") 

1891 componentURI = ResourcePath("a/b/c") 

1892 

1893 # affirm that DatasetRefURIs does not support MutableSequence functions 

1894 with self.assertRaises(TypeError): 

1895 uris[0] = primaryURI 

1896 with self.assertRaises(TypeError): 

1897 uris[1] = {"foo": componentURI} 

1898 

1899 # but DatasetRefURIs can be set by property name: 

1900 uris.primaryURI = primaryURI 

1901 uris.componentURIs = {"foo": componentURI} 

1902 self.assertEqual(uris.primaryURI, primaryURI) 

1903 self.assertEqual(uris[0], primaryURI) 

1904 

1905 primary, components = uris 

1906 self.assertEqual(primary, primaryURI) 

1907 self.assertEqual(components, {"foo": componentURI}) 

1908 

1909 def testRepr(self) -> None: 

1910 """Verify __repr__ output.""" 

1911 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1912 self.assertEqual( 

1913 repr(uris), 

1914 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1915 ) 

1916 

1917 

1918class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1919 """Test the StoredFileInfo class.""" 

1920 

1921 storageClassFactory = StorageClassFactory() 

1922 

1923 def test_StoredFileInfo(self) -> None: 

1924 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1925 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1926 

1927 record = dict( 

1928 storage_class="StructuredDataDict", 

1929 formatter="lsst.daf.butler.Formatter", 

1930 path="a/b/c.txt", 

1931 component="component", 

1932 dataset_id=ref.id, 

1933 checksum=None, 

1934 file_size=5, 

1935 ) 

1936 info = StoredFileInfo.from_record(record) 

1937 

1938 self.assertEqual(info.dataset_id, ref.id) 

1939 self.assertEqual(info.to_record(), record) 

1940 

1941 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1942 rebased = info.rebase(ref2) 

1943 self.assertEqual(rebased.dataset_id, ref2.id) 

1944 self.assertEqual(rebased.rebase(ref), info) 

1945 

1946 with self.assertRaises(TypeError): 

1947 rebased.update(formatter=42) 

1948 

1949 with self.assertRaises(ValueError): 

1950 rebased.update(something=42, new="42") 

1951 

1952 # Check that pickle works on StoredFileInfo. 

1953 pickled_info = pickle.dumps(info) 

1954 unpickled_info = pickle.loads(pickled_info) 

1955 self.assertEqual(unpickled_info, info) 

1956 

1957 

1958if __name__ == "__main__": 

1959 unittest.main()