Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import shutil 

25import yaml 

26import tempfile 

27import time 

28from dataclasses import dataclass 

29import lsst.utils.tests 

30 

31from lsst.utils import doImport 

32 

33from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset 

34from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError 

35from lsst.daf.butler.formatters.yaml import YamlFormatter 

36from lsst.daf.butler import (DatastoreCacheManager, DatastoreDisabledCacheManager, 

37 DatastoreCacheManagerConfig, Config, ButlerURI, NamedKeyDict) 

38 

39from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter, 

40 BadNoWriteFormatter, MetricsExample, DummyRegistry) 

41 

42 

43TESTDIR = os.path.dirname(__file__) 

44 

45 

46def makeExampleMetrics(use_none=False): 

47 if use_none: 

48 array = None 

49 else: 

50 array = [563, 234, 456.7, 105, 2054, -1045] 

51 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

52 {"a": [1, 2, 3], 

53 "b": {"blue": 5, "red": "green"}}, 

54 array, 

55 ) 

56 

57 

58@dataclass(frozen=True) 

59class Named: 

60 name: str 

61 

62 

63class FakeDataCoordinate(NamedKeyDict): 

64 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

65 

66 @classmethod 

67 def from_dict(cls, dataId): 

68 new = cls() 

69 for k, v in dataId.items(): 

70 new[Named(k)] = v 

71 return new.freeze() 

72 

73 def __hash__(self) -> int: 

74 return hash(frozenset(self.items())) 

75 

76 

77class TransactionTestError(Exception): 

78 """Specific error for transactions, to prevent misdiagnosing 

79 that might otherwise occur when a standard exception is used. 

80 """ 

81 pass 

82 

83 

84class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

85 """Support routines for datastore testing""" 

86 root = None 

87 

88 @classmethod 

89 def setUpClass(cls): 

90 # Storage Classes are fixed for all datastores in these tests 

91 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

92 cls.storageClassFactory = StorageClassFactory() 

93 cls.storageClassFactory.addFromConfig(scConfigFile) 

94 

95 # Read the Datastore config so we can get the class 

96 # information (since we should not assume the constructor 

97 # name here, but rely on the configuration file itself) 

98 datastoreConfig = DatastoreConfig(cls.configFile) 

99 cls.datastoreType = doImport(datastoreConfig["cls"]) 

100 cls.universe = DimensionUniverse() 

101 

102 def setUp(self): 

103 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

104 

105 def tearDown(self): 

106 if self.root is not None and os.path.exists(self.root): 

107 shutil.rmtree(self.root, ignore_errors=True) 

108 

109 

110class DatastoreTests(DatastoreTestsBase): 

111 """Some basic tests of a simple datastore.""" 

112 

113 hasUnsupportedPut = True 

114 

115 def testConfigRoot(self): 

116 full = DatastoreConfig(self.configFile) 

117 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

118 newroot = "/random/location" 

119 self.datastoreType.setConfigRoot(newroot, config, full) 

120 if self.rootKeys: 

121 for k in self.rootKeys: 

122 self.assertIn(newroot, config[k]) 

123 

124 def testConstructor(self): 

125 datastore = self.makeDatastore() 

126 self.assertIsNotNone(datastore) 

127 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

128 

129 def testConfigurationValidation(self): 

130 datastore = self.makeDatastore() 

131 sc = self.storageClassFactory.getStorageClass("ThingOne") 

132 datastore.validateConfiguration([sc]) 

133 

134 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

135 if self.validationCanFail: 

136 with self.assertRaises(DatastoreValidationError): 

137 datastore.validateConfiguration([sc2], logFailures=True) 

138 

139 dimensions = self.universe.extract(("visit", "physical_filter")) 

140 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

141 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

142 datastore.validateConfiguration([ref]) 

143 

144 def testParameterValidation(self): 

145 """Check that parameters are validated""" 

146 sc = self.storageClassFactory.getStorageClass("ThingOne") 

147 dimensions = self.universe.extract(("visit", "physical_filter")) 

148 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

149 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

150 datastore = self.makeDatastore() 

151 data = {1: 2, 3: 4} 

152 datastore.put(data, ref) 

153 newdata = datastore.get(ref) 

154 self.assertEqual(data, newdata) 

155 with self.assertRaises(KeyError): 

156 newdata = datastore.get(ref, parameters={"missing": 5}) 

157 

158 def testBasicPutGet(self): 

159 metrics = makeExampleMetrics() 

160 datastore = self.makeDatastore() 

161 

162 # Create multiple storage classes for testing different formulations 

163 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

164 for sc in ("StructuredData", 

165 "StructuredDataJson", 

166 "StructuredDataPickle")] 

167 

168 dimensions = self.universe.extract(("visit", "physical_filter")) 

169 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

170 

171 for sc in storageClasses: 

172 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

173 print("Using storageClass: {}".format(sc.name)) 

174 datastore.put(metrics, ref) 

175 

176 # Does it exist? 

177 self.assertTrue(datastore.exists(ref)) 

178 

179 # Get 

180 metricsOut = datastore.get(ref, parameters=None) 

181 self.assertEqual(metrics, metricsOut) 

182 

183 uri = datastore.getURI(ref) 

184 self.assertEqual(uri.scheme, self.uriScheme) 

185 

186 # Get a component -- we need to construct new refs for them 

187 # with derived storage classes but with parent ID 

188 for comp in ("data", "output"): 

189 compRef = ref.makeComponentRef(comp) 

190 output = datastore.get(compRef) 

191 self.assertEqual(output, getattr(metricsOut, comp)) 

192 

193 uri = datastore.getURI(compRef) 

194 self.assertEqual(uri.scheme, self.uriScheme) 

195 

196 storageClass = sc 

197 

198 # Check that we can put a metric with None in a component and 

199 # get it back as None 

200 metricsNone = makeExampleMetrics(use_none=True) 

201 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

202 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

203 datastore.put(metricsNone, refNone) 

204 

205 comp = "data" 

206 for comp in ("data", "output"): 

207 compRef = refNone.makeComponentRef(comp) 

208 output = datastore.get(compRef) 

209 self.assertEqual(output, getattr(metricsNone, comp)) 

210 

211 # Check that a put fails if the dataset type is not supported 

212 if self.hasUnsupportedPut: 

213 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

214 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

215 with self.assertRaises(DatasetTypeNotSupportedError): 

216 datastore.put(metrics, ref) 

217 

218 # These should raise 

219 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

220 with self.assertRaises(FileNotFoundError): 

221 # non-existing file 

222 datastore.get(ref) 

223 

224 # Get a URI from it 

225 uri = datastore.getURI(ref, predict=True) 

226 self.assertEqual(uri.scheme, self.uriScheme) 

227 

228 with self.assertRaises(FileNotFoundError): 

229 datastore.getURI(ref) 

230 

231 def testTrustGetRequest(self): 

232 """Check that we can get datasets that registry knows nothing about. 

233 """ 

234 

235 datastore = self.makeDatastore() 

236 

237 # Skip test if the attribute is not defined 

238 if not hasattr(datastore, "trustGetRequest"): 

239 return 

240 

241 metrics = makeExampleMetrics() 

242 

243 i = 0 

244 for sc_name in ("StructuredData", "StructuredComposite"): 

245 i += 1 

246 datasetTypeName = f"metric{i}" 

247 

248 if sc_name == "StructuredComposite": 

249 disassembled = True 

250 else: 

251 disassembled = False 

252 

253 # Start datastore in default configuration of using registry 

254 datastore.trustGetRequest = False 

255 

256 # Create multiple storage classes for testing with or without 

257 # disassembly 

258 sc = self.storageClassFactory.getStorageClass(sc_name) 

259 dimensions = self.universe.extract(("visit", "physical_filter")) 

260 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

261 

262 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

263 datastore.put(metrics, ref) 

264 

265 # Does it exist? 

266 self.assertTrue(datastore.exists(ref)) 

267 

268 # Get 

269 metricsOut = datastore.get(ref) 

270 self.assertEqual(metrics, metricsOut) 

271 

272 # Get the URI(s) 

273 primaryURI, componentURIs = datastore.getURIs(ref) 

274 if disassembled: 

275 self.assertIsNone(primaryURI) 

276 self.assertEqual(len(componentURIs), 3) 

277 else: 

278 self.assertIn(datasetTypeName, primaryURI.path) 

279 self.assertFalse(componentURIs) 

280 

281 # Delete registry entry so now we are trusting 

282 datastore.removeStoredItemInfo(ref) 

283 

284 # Now stop trusting and check that things break 

285 datastore.trustGetRequest = False 

286 

287 # Does it exist? 

288 self.assertFalse(datastore.exists(ref)) 

289 

290 with self.assertRaises(FileNotFoundError): 

291 datastore.get(ref) 

292 

293 with self.assertRaises(FileNotFoundError): 

294 datastore.get(ref.makeComponentRef("data")) 

295 

296 # URI should fail unless we ask for prediction 

297 with self.assertRaises(FileNotFoundError): 

298 datastore.getURIs(ref) 

299 

300 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

301 if disassembled: 

302 self.assertIsNone(predicted_primary) 

303 self.assertEqual(len(predicted_disassembled), 3) 

304 for uri in predicted_disassembled.values(): 

305 self.assertEqual(uri.fragment, "predicted") 

306 self.assertIn(datasetTypeName, uri.path) 

307 else: 

308 self.assertIn(datasetTypeName, predicted_primary.path) 

309 self.assertFalse(predicted_disassembled) 

310 self.assertEqual(predicted_primary.fragment, "predicted") 

311 

312 # Now enable registry-free trusting mode 

313 datastore.trustGetRequest = True 

314 

315 # Try again to get it 

316 metricsOut = datastore.get(ref) 

317 self.assertEqual(metricsOut, metrics) 

318 

319 # Does it exist? 

320 self.assertTrue(datastore.exists(ref)) 

321 

322 # Get a component 

323 comp = "data" 

324 compRef = ref.makeComponentRef(comp) 

325 output = datastore.get(compRef) 

326 self.assertEqual(output, getattr(metrics, comp)) 

327 

328 # Get the URI -- if we trust this should work even without 

329 # enabling prediction. 

330 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

331 self.assertEqual(primaryURI2, primaryURI) 

332 self.assertEqual(componentURIs2, componentURIs) 

333 

334 def testDisassembly(self): 

335 """Test disassembly within datastore.""" 

336 metrics = makeExampleMetrics() 

337 if self.isEphemeral: 

338 # in-memory datastore does not disassemble 

339 return 

340 

341 # Create multiple storage classes for testing different formulations 

342 # of composites. One of these will not disassemble to provide 

343 # a reference. 

344 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

345 for sc in ("StructuredComposite", 

346 "StructuredCompositeTestA", 

347 "StructuredCompositeTestB", 

348 "StructuredCompositeReadComp", 

349 "StructuredData", # No disassembly 

350 "StructuredCompositeReadCompNoDisassembly", 

351 )] 

352 

353 # Create the test datastore 

354 datastore = self.makeDatastore() 

355 

356 # Dummy dataId 

357 dimensions = self.universe.extract(("visit", "physical_filter")) 

358 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

359 

360 for i, sc in enumerate(storageClasses): 

361 with self.subTest(storageClass=sc.name): 

362 # Create a different dataset type each time round 

363 # so that a test failure in this subtest does not trigger 

364 # a cascade of tests because of file clashes 

365 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, 

366 conform=False) 

367 

368 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

369 

370 datastore.put(metrics, ref) 

371 

372 baseURI, compURIs = datastore.getURIs(ref) 

373 if disassembled: 

374 self.assertIsNone(baseURI) 

375 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

376 else: 

377 self.assertIsNotNone(baseURI) 

378 self.assertEqual(compURIs, {}) 

379 

380 metrics_get = datastore.get(ref) 

381 self.assertEqual(metrics_get, metrics) 

382 

383 # Retrieve the composite with read parameter 

384 stop = 4 

385 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

386 self.assertEqual(metrics_get.summary, metrics.summary) 

387 self.assertEqual(metrics_get.output, metrics.output) 

388 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

389 

390 # Retrieve a component 

391 data = datastore.get(ref.makeComponentRef("data")) 

392 self.assertEqual(data, metrics.data) 

393 

394 # On supported storage classes attempt to access a read 

395 # only component 

396 if "ReadComp" in sc.name: 

397 cRef = ref.makeComponentRef("counter") 

398 counter = datastore.get(cRef) 

399 self.assertEqual(counter, len(metrics.data)) 

400 

401 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

402 self.assertEqual(counter, stop) 

403 

404 datastore.remove(ref) 

405 

406 def testRegistryCompositePutGet(self): 

407 """Tests the case where registry disassembles and puts to datastore. 

408 """ 

409 metrics = makeExampleMetrics() 

410 datastore = self.makeDatastore() 

411 

412 # Create multiple storage classes for testing different formulations 

413 # of composites 

414 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

415 for sc in ("StructuredComposite", 

416 "StructuredCompositeTestA", 

417 "StructuredCompositeTestB", 

418 )] 

419 

420 dimensions = self.universe.extract(("visit", "physical_filter")) 

421 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

422 

423 for sc in storageClasses: 

424 print("Using storageClass: {}".format(sc.name)) 

425 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, 

426 conform=False) 

427 

428 components = sc.delegate().disassemble(metrics) 

429 self.assertTrue(components) 

430 

431 compsRead = {} 

432 for compName, compInfo in components.items(): 

433 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, 

434 components[compName].storageClass, dataId, 

435 conform=False) 

436 

437 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

438 datastore.put(compInfo.component, compRef) 

439 

440 uri = datastore.getURI(compRef) 

441 self.assertEqual(uri.scheme, self.uriScheme) 

442 

443 compsRead[compName] = datastore.get(compRef) 

444 

445 # We can generate identical files for each storage class 

446 # so remove the component here 

447 datastore.remove(compRef) 

448 

449 # combine all the components we read back into a new composite 

450 metricsOut = sc.delegate().assemble(compsRead) 

451 self.assertEqual(metrics, metricsOut) 

452 

453 def prepDeleteTest(self, n_refs=1): 

454 metrics = makeExampleMetrics() 

455 datastore = self.makeDatastore() 

456 # Put 

457 dimensions = self.universe.extract(("visit", "physical_filter")) 

458 sc = self.storageClassFactory.getStorageClass("StructuredData") 

459 refs = [] 

460 for i in range(n_refs): 

461 dataId = FakeDataCoordinate.from_dict({"instrument": "dummy", "visit": 638 + i, 

462 "physical_filter": "U"}) 

463 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

464 datastore.put(metrics, ref) 

465 

466 # Does it exist? 

467 self.assertTrue(datastore.exists(ref)) 

468 

469 # Get 

470 metricsOut = datastore.get(ref) 

471 self.assertEqual(metrics, metricsOut) 

472 refs.append(ref) 

473 

474 return datastore, *refs 

475 

476 def testRemove(self): 

477 datastore, ref = self.prepDeleteTest() 

478 

479 # Remove 

480 datastore.remove(ref) 

481 

482 # Does it exist? 

483 self.assertFalse(datastore.exists(ref)) 

484 

485 # Do we now get a predicted URI? 

486 uri = datastore.getURI(ref, predict=True) 

487 self.assertEqual(uri.fragment, "predicted") 

488 

489 # Get should now fail 

490 with self.assertRaises(FileNotFoundError): 

491 datastore.get(ref) 

492 # Can only delete once 

493 with self.assertRaises(FileNotFoundError): 

494 datastore.remove(ref) 

495 

496 def testForget(self): 

497 datastore, ref = self.prepDeleteTest() 

498 

499 # Remove 

500 datastore.forget([ref]) 

501 

502 # Does it exist (as far as we know)? 

503 self.assertFalse(datastore.exists(ref)) 

504 

505 # Do we now get a predicted URI? 

506 uri = datastore.getURI(ref, predict=True) 

507 self.assertEqual(uri.fragment, "predicted") 

508 

509 # Get should now fail 

510 with self.assertRaises(FileNotFoundError): 

511 datastore.get(ref) 

512 

513 # Forgetting again is a silent no-op 

514 datastore.forget([ref]) 

515 

516 # Predicted URI should still point to the file. 

517 self.assertTrue(uri.exists()) 

518 

519 def testTransfer(self): 

520 metrics = makeExampleMetrics() 

521 

522 dimensions = self.universe.extract(("visit", "physical_filter")) 

523 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

524 

525 sc = self.storageClassFactory.getStorageClass("StructuredData") 

526 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

527 

528 inputDatastore = self.makeDatastore("test_input_datastore") 

529 outputDatastore = self.makeDatastore("test_output_datastore") 

530 

531 inputDatastore.put(metrics, ref) 

532 outputDatastore.transfer(inputDatastore, ref) 

533 

534 metricsOut = outputDatastore.get(ref) 

535 self.assertEqual(metrics, metricsOut) 

536 

537 def testBasicTransaction(self): 

538 datastore = self.makeDatastore() 

539 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

540 dimensions = self.universe.extract(("visit", "physical_filter")) 

541 nDatasets = 6 

542 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

543 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

544 makeExampleMetrics(),) 

545 for dataId in dataIds] 

546 succeed = data[:nDatasets//2] 

547 fail = data[nDatasets//2:] 

548 # All datasets added in this transaction should continue to exist 

549 with datastore.transaction(): 

550 for ref, metrics in succeed: 

551 datastore.put(metrics, ref) 

552 # Whereas datasets added in this transaction should not 

553 with self.assertRaises(TransactionTestError): 

554 with datastore.transaction(): 

555 for ref, metrics in fail: 

556 datastore.put(metrics, ref) 

557 raise TransactionTestError("This should propagate out of the context manager") 

558 # Check for datasets that should exist 

559 for ref, metrics in succeed: 

560 # Does it exist? 

561 self.assertTrue(datastore.exists(ref)) 

562 # Get 

563 metricsOut = datastore.get(ref, parameters=None) 

564 self.assertEqual(metrics, metricsOut) 

565 # URI 

566 uri = datastore.getURI(ref) 

567 self.assertEqual(uri.scheme, self.uriScheme) 

568 # Check for datasets that should not exist 

569 for ref, _ in fail: 

570 # These should raise 

571 with self.assertRaises(FileNotFoundError): 

572 # non-existing file 

573 datastore.get(ref) 

574 with self.assertRaises(FileNotFoundError): 

575 datastore.getURI(ref) 

576 

577 def testNestedTransaction(self): 

578 datastore = self.makeDatastore() 

579 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

580 dimensions = self.universe.extract(("visit", "physical_filter")) 

581 metrics = makeExampleMetrics() 

582 

583 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

584 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

585 conform=False) 

586 datastore.put(metrics, refBefore) 

587 with self.assertRaises(TransactionTestError): 

588 with datastore.transaction(): 

589 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

590 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

591 conform=False) 

592 datastore.put(metrics, refOuter) 

593 with datastore.transaction(): 

594 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

595 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

596 conform=False) 

597 datastore.put(metrics, refInner) 

598 # All datasets should exist 

599 for ref in (refBefore, refOuter, refInner): 

600 metricsOut = datastore.get(ref, parameters=None) 

601 self.assertEqual(metrics, metricsOut) 

602 raise TransactionTestError("This should roll back the transaction") 

603 # Dataset(s) inserted before the transaction should still exist 

604 metricsOut = datastore.get(refBefore, parameters=None) 

605 self.assertEqual(metrics, metricsOut) 

606 # But all datasets inserted during the (rolled back) transaction 

607 # should be gone 

608 with self.assertRaises(FileNotFoundError): 

609 datastore.get(refOuter) 

610 with self.assertRaises(FileNotFoundError): 

611 datastore.get(refInner) 

612 

613 def _prepareIngestTest(self): 

614 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

615 dimensions = self.universe.extract(("visit", "physical_filter")) 

616 metrics = makeExampleMetrics() 

617 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

618 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

619 return metrics, ref 

620 

621 def runIngestTest(self, func, expectOutput=True): 

622 metrics, ref = self._prepareIngestTest() 

623 # The file will be deleted after the test. 

624 # For symlink tests this leads to a situation where the datastore 

625 # points to a file that does not exist. This will make os.path.exist 

626 # return False but then the new symlink will fail with 

627 # FileExistsError later in the code so the test still passes. 

628 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

629 with open(path, 'w') as fd: 

630 yaml.dump(metrics._asdict(), stream=fd) 

631 func(metrics, path, ref) 

632 

633 def testIngestNoTransfer(self): 

634 """Test ingesting existing files with no transfer. 

635 """ 

636 for mode in (None, "auto"): 

637 

638 # Some datastores have auto but can't do in place transfer 

639 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

640 continue 

641 

642 with self.subTest(mode=mode): 

643 datastore = self.makeDatastore() 

644 

645 def succeed(obj, path, ref): 

646 """Ingest a file already in the datastore root.""" 

647 # first move it into the root, and adjust the path 

648 # accordingly 

649 path = shutil.copy(path, datastore.root.ospath) 

650 path = os.path.relpath(path, start=datastore.root.ospath) 

651 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

652 self.assertEqual(obj, datastore.get(ref)) 

653 

654 def failInputDoesNotExist(obj, path, ref): 

655 """Can't ingest files if we're given a bad path.""" 

656 with self.assertRaises(FileNotFoundError): 

657 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), 

658 transfer=mode) 

659 self.assertFalse(datastore.exists(ref)) 

660 

661 def failOutsideRoot(obj, path, ref): 

662 """Can't ingest files outside of datastore root unless 

663 auto.""" 

664 if mode == "auto": 

665 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

666 self.assertTrue(datastore.exists(ref)) 

667 else: 

668 with self.assertRaises(RuntimeError): 

669 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

670 self.assertFalse(datastore.exists(ref)) 

671 

672 def failNotImplemented(obj, path, ref): 

673 with self.assertRaises(NotImplementedError): 

674 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

675 

676 if mode in self.ingestTransferModes: 

677 self.runIngestTest(failOutsideRoot) 

678 self.runIngestTest(failInputDoesNotExist) 

679 self.runIngestTest(succeed) 

680 else: 

681 self.runIngestTest(failNotImplemented) 

682 

683 def testIngestTransfer(self): 

684 """Test ingesting existing files after transferring them. 

685 """ 

686 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

687 with self.subTest(mode=mode): 

688 datastore = self.makeDatastore(mode) 

689 

690 def succeed(obj, path, ref): 

691 """Ingest a file by transferring it to the template 

692 location.""" 

693 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

694 self.assertEqual(obj, datastore.get(ref)) 

695 

696 def failInputDoesNotExist(obj, path, ref): 

697 """Can't ingest files if we're given a bad path.""" 

698 with self.assertRaises(FileNotFoundError): 

699 # Ensure the file does not look like it is in 

700 # datastore for auto mode 

701 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), 

702 transfer=mode) 

703 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

704 

705 def failNotImplemented(obj, path, ref): 

706 with self.assertRaises(NotImplementedError): 

707 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

708 

709 if mode in self.ingestTransferModes: 

710 self.runIngestTest(failInputDoesNotExist) 

711 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

712 else: 

713 self.runIngestTest(failNotImplemented) 

714 

715 def testIngestSymlinkOfSymlink(self): 

716 """Special test for symlink to a symlink ingest""" 

717 metrics, ref = self._prepareIngestTest() 

718 # The aim of this test is to create a dataset on disk, then 

719 # create a symlink to it and finally ingest the symlink such that 

720 # the symlink in the datastore points to the original dataset. 

721 for mode in ("symlink", "relsymlink"): 

722 if mode not in self.ingestTransferModes: 

723 continue 

724 

725 print(f"Trying mode {mode}") 

726 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

727 with open(realpath, 'w') as fd: 

728 yaml.dump(metrics._asdict(), stream=fd) 

729 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

730 os.symlink(os.path.abspath(realpath), sympath) 

731 

732 datastore = self.makeDatastore() 

733 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

734 

735 uri = datastore.getURI(ref) 

736 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

737 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

738 

739 linkTarget = os.readlink(uri.ospath) 

740 if mode == "relsymlink": 

741 self.assertFalse(os.path.isabs(linkTarget)) 

742 else: 

743 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

744 

745 # Check that we can get the dataset back regardless of mode 

746 metric2 = datastore.get(ref) 

747 self.assertEqual(metric2, metrics) 

748 

749 # Cleanup the file for next time round loop 

750 # since it will get the same file name in store 

751 datastore.remove(ref) 

752 

753 

754class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

755 """PosixDatastore specialization""" 

756 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

757 uriScheme = "file" 

758 canIngestNoTransferAuto = True 

759 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

760 isEphemeral = False 

761 rootKeys = ("root",) 

762 validationCanFail = True 

763 

764 def setUp(self): 

765 # Override the working directory before calling the base class 

766 self.root = tempfile.mkdtemp(dir=TESTDIR) 

767 super().setUp() 

768 

769 

770class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

771 """Posix datastore tests but with checksums disabled.""" 

772 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

773 

774 def testChecksum(self): 

775 """Ensure that checksums have not been calculated.""" 

776 

777 datastore = self.makeDatastore() 

778 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

779 dimensions = self.universe.extract(("visit", "physical_filter")) 

780 metrics = makeExampleMetrics() 

781 

782 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

783 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

784 conform=False) 

785 

786 # Configuration should have disabled checksum calculation 

787 datastore.put(metrics, ref) 

788 infos = datastore.getStoredItemsInfo(ref) 

789 self.assertIsNone(infos[0].checksum) 

790 

791 # Remove put back but with checksums enabled explicitly 

792 datastore.remove(ref) 

793 datastore.useChecksum = True 

794 datastore.put(metrics, ref) 

795 

796 infos = datastore.getStoredItemsInfo(ref) 

797 self.assertIsNotNone(infos[0].checksum) 

798 

799 

800class TrashDatastoreTestCase(PosixDatastoreTestCase, unittest.TestCase): 

801 """Restrict trash test to FileDatastore.""" 

802 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

803 

804 def setUp(self): 

805 # Override the working directory before calling the base class 

806 self.root = tempfile.mkdtemp(dir=TESTDIR) 

807 super().setUp() 

808 

809 def testTrash(self): 

810 datastore, *refs = self.prepDeleteTest(n_refs=10) 

811 

812 # Trash one of them. 

813 ref = refs.pop() 

814 uri = datastore.getURI(ref) 

815 datastore.trash(ref) 

816 self.assertTrue(uri.exists(), uri) # Not deleted yet 

817 datastore.emptyTrash() 

818 self.assertFalse(uri.exists(), uri) 

819 

820 # Trash it again should be fine. 

821 datastore.trash(ref) 

822 

823 # Trash multiple items at once. 

824 subset = [refs.pop(), refs.pop()] 

825 datastore.trash(subset) 

826 datastore.emptyTrash() 

827 

828 # Remove a record and trash should do nothing. 

829 # This is execution butler scenario. 

830 ref = refs.pop() 

831 uri = datastore.getURI(ref) 

832 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

833 self.assertTrue(uri.exists()) 

834 datastore.trash(ref) 

835 datastore.emptyTrash() 

836 self.assertTrue(uri.exists()) 

837 

838 # Switch on trust and it should delete the file. 

839 datastore.trustGetRequest = True 

840 datastore.trash([ref]) 

841 self.assertFalse(uri.exists()) 

842 

843 # Remove multiples at once in trust mode. 

844 subset = [refs.pop() for i in range(3)] 

845 datastore.trash(subset) 

846 datastore.trash(refs.pop()) # Check that a single ref can trash 

847 

848 

849class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

850 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

851 

852 def setUp(self): 

853 # Override the working directory before calling the base class 

854 self.root = tempfile.mkdtemp(dir=TESTDIR) 

855 super().setUp() 

856 

857 def testCleanup(self): 

858 """Test that a failed formatter write does cleanup a partial file.""" 

859 metrics = makeExampleMetrics() 

860 datastore = self.makeDatastore() 

861 

862 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

863 

864 dimensions = self.universe.extract(("visit", "physical_filter")) 

865 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

866 

867 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

868 

869 # Determine where the file will end up (we assume Formatters use 

870 # the same file extension) 

871 expectedUri = datastore.getURI(ref, predict=True) 

872 self.assertEqual(expectedUri.fragment, "predicted") 

873 

874 self.assertEqual(expectedUri.getExtension(), ".yaml", 

875 f"Is there a file extension in {expectedUri}") 

876 

877 # Try formatter that fails and formatter that fails and leaves 

878 # a file behind 

879 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

880 with self.subTest(formatter=formatter): 

881 

882 # Monkey patch the formatter 

883 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, 

884 overwrite=True) 

885 

886 # Try to put the dataset, it should fail 

887 with self.assertRaises(Exception): 

888 datastore.put(metrics, ref) 

889 

890 # Check that there is no file on disk 

891 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

892 

893 # Check that there is a directory 

894 dir = expectedUri.dirname() 

895 self.assertTrue(dir.exists(), 

896 f"Check for existence of directory {dir}") 

897 

898 # Force YamlFormatter and check that this time a file is written 

899 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, 

900 overwrite=True) 

901 datastore.put(metrics, ref) 

902 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

903 datastore.remove(ref) 

904 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

905 

906 

907class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

908 """PosixDatastore specialization""" 

909 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

910 uriScheme = "mem" 

911 hasUnsupportedPut = False 

912 ingestTransferModes = () 

913 isEphemeral = True 

914 rootKeys = None 

915 validationCanFail = False 

916 

917 

918class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

919 """ChainedDatastore specialization using a POSIXDatastore""" 

920 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

921 hasUnsupportedPut = False 

922 canIngestNoTransferAuto = False 

923 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

924 isEphemeral = False 

925 rootKeys = (".datastores.1.root", ".datastores.2.root") 

926 validationCanFail = True 

927 

928 

929class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

930 """ChainedDatastore specialization using all InMemoryDatastore""" 

931 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

932 validationCanFail = False 

933 

934 

935class DatastoreConstraintsTests(DatastoreTestsBase): 

936 """Basic tests of constraints model of Datastores.""" 

937 

938 def testConstraints(self): 

939 """Test constraints model. Assumes that each test class has the 

940 same constraints.""" 

941 metrics = makeExampleMetrics() 

942 datastore = self.makeDatastore() 

943 

944 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

945 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

946 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

947 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

948 

949 # Write empty file suitable for ingest check (JSON and YAML variants) 

950 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

951 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

952 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), 

953 ("metric33", sc1, True), ("metric2", sc2, True)): 

954 # Choose different temp file depending on StorageClass 

955 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

956 

957 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

958 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

959 if accepted: 

960 datastore.put(metrics, ref) 

961 self.assertTrue(datastore.exists(ref)) 

962 datastore.remove(ref) 

963 

964 # Try ingest 

965 if self.canIngest: 

966 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

967 self.assertTrue(datastore.exists(ref)) 

968 datastore.remove(ref) 

969 else: 

970 with self.assertRaises(DatasetTypeNotSupportedError): 

971 datastore.put(metrics, ref) 

972 self.assertFalse(datastore.exists(ref)) 

973 

974 # Again with ingest 

975 if self.canIngest: 

976 with self.assertRaises(DatasetTypeNotSupportedError): 

977 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

978 self.assertFalse(datastore.exists(ref)) 

979 

980 

981class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

982 """PosixDatastore specialization""" 

983 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

984 canIngest = True 

985 

986 def setUp(self): 

987 # Override the working directory before calling the base class 

988 self.root = tempfile.mkdtemp(dir=TESTDIR) 

989 super().setUp() 

990 

991 

992class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

993 """InMemoryDatastore specialization""" 

994 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

995 canIngest = False 

996 

997 

998class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

999 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1000 at the ChainedDatstore """ 

1001 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1002 

1003 

1004class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1005 """ChainedDatastore specialization using a POSIXDatastore""" 

1006 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1007 

1008 

1009class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1010 """ChainedDatastore specialization using all InMemoryDatastore""" 

1011 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1012 canIngest = False 

1013 

1014 

1015class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1016 """Test that a chained datastore can control constraints per-datastore 

1017 even if child datastore would accept.""" 

1018 

1019 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1020 

1021 def setUp(self): 

1022 # Override the working directory before calling the base class 

1023 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1024 super().setUp() 

1025 

1026 def testConstraints(self): 

1027 """Test chained datastore constraints model.""" 

1028 metrics = makeExampleMetrics() 

1029 datastore = self.makeDatastore() 

1030 

1031 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1032 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1033 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1034 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1035 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1036 

1037 # Write empty file suitable for ingest check (JSON and YAML variants) 

1038 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1039 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1040 

1041 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), 

1042 ("metric2", dataId1, sc1, (False, False, False), False), 

1043 ("metric2", dataId2, sc1, (True, False, False), False), 

1044 ("metric33", dataId2, sc2, (True, True, False), True), 

1045 ("metric2", dataId1, sc2, (False, True, False), True)): 

1046 

1047 # Choose different temp file depending on StorageClass 

1048 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1049 

1050 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1051 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, 

1052 conform=False) 

1053 if any(accept): 

1054 datastore.put(metrics, ref) 

1055 self.assertTrue(datastore.exists(ref)) 

1056 

1057 # Check each datastore inside the chained datastore 

1058 for childDatastore, expected in zip(datastore.datastores, accept): 

1059 self.assertEqual(childDatastore.exists(ref), expected, 

1060 f"Testing presence of {ref} in datastore {childDatastore.name}") 

1061 

1062 datastore.remove(ref) 

1063 

1064 # Check that ingest works 

1065 if ingest: 

1066 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1067 self.assertTrue(datastore.exists(ref)) 

1068 

1069 # Check each datastore inside the chained datastore 

1070 for childDatastore, expected in zip(datastore.datastores, accept): 

1071 # Ephemeral datastores means InMemory at the moment 

1072 # and that does not accept ingest of files. 

1073 if childDatastore.isEphemeral: 

1074 expected = False 

1075 self.assertEqual(childDatastore.exists(ref), expected, 

1076 f"Testing presence of ingested {ref} in datastore" 

1077 f" {childDatastore.name}") 

1078 

1079 datastore.remove(ref) 

1080 else: 

1081 with self.assertRaises(DatasetTypeNotSupportedError): 

1082 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1083 

1084 else: 

1085 with self.assertRaises(DatasetTypeNotSupportedError): 

1086 datastore.put(metrics, ref) 

1087 self.assertFalse(datastore.exists(ref)) 

1088 

1089 # Again with ingest 

1090 with self.assertRaises(DatasetTypeNotSupportedError): 

1091 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1092 self.assertFalse(datastore.exists(ref)) 

1093 

1094 

1095class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1096 """Tests for datastore caching infrastructure.""" 

1097 

1098 @classmethod 

1099 def setUpClass(cls): 

1100 cls.storageClassFactory = StorageClassFactory() 

1101 cls.universe = DimensionUniverse() 

1102 

1103 # Ensure that we load the test storage class definitions. 

1104 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1105 cls.storageClassFactory.addFromConfig(scConfigFile) 

1106 

1107 def setUp(self): 

1108 self.id = 0 

1109 

1110 # Create a root that we can use for caching tests. 

1111 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1112 

1113 # Create some test dataset refs and associated test files 

1114 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1115 dimensions = self.universe.extract(("visit", "physical_filter")) 

1116 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1117 

1118 # Create list of refs and list of temporary files 

1119 n_datasets = 10 

1120 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, 

1121 conform=False) for n in range(n_datasets)] 

1122 

1123 root_uri = ButlerURI(self.root, forceDirectory=True) 

1124 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1125 

1126 # Create test files. 

1127 for uri in self.files: 

1128 uri.write(b"0123456789") 

1129 

1130 # Create some composite refs with component files. 

1131 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1132 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, 

1133 conform=False) for n in range(3)] 

1134 self.comp_files = [] 

1135 self.comp_refs = [] 

1136 for n, ref in enumerate(self.composite_refs): 

1137 component_refs = [] 

1138 component_files = [] 

1139 for component in sc.components: 

1140 component_ref = ref.makeComponentRef(component) 

1141 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1142 component_refs.append(component_ref) 

1143 component_files.append(file) 

1144 file.write(b"9876543210") 

1145 

1146 self.comp_files.append(component_files) 

1147 self.comp_refs.append(component_refs) 

1148 

1149 def tearDown(self): 

1150 if self.root is not None and os.path.exists(self.root): 

1151 shutil.rmtree(self.root, ignore_errors=True) 

1152 

1153 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1154 config = Config.fromYaml(config_str) 

1155 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1156 

1157 def testNoCacheDir(self): 

1158 config_str = """ 

1159cached: 

1160 root: null 

1161 cacheable: 

1162 metric0: true 

1163 """ 

1164 cache_manager = self._make_cache_manager(config_str) 

1165 

1166 # Look inside to check we don't have a cache directory 

1167 self.assertIsNone(cache_manager._cache_directory) 

1168 

1169 self.assertCache(cache_manager) 

1170 

1171 # Test that the cache directory is marked temporary 

1172 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1173 

1174 def testNoCacheDirReversed(self): 

1175 """Use default caching status and metric1 to false""" 

1176 config_str = """ 

1177cached: 

1178 root: null 

1179 default: true 

1180 cacheable: 

1181 metric1: false 

1182 """ 

1183 cache_manager = self._make_cache_manager(config_str) 

1184 

1185 self.assertCache(cache_manager) 

1186 

1187 def testExplicitCacheDir(self): 

1188 config_str = f""" 

1189cached: 

1190 root: '{self.root}' 

1191 cacheable: 

1192 metric0: true 

1193 """ 

1194 cache_manager = self._make_cache_manager(config_str) 

1195 

1196 # Look inside to check we do have a cache directory. 

1197 self.assertEqual(cache_manager.cache_directory, 

1198 ButlerURI(self.root, forceDirectory=True)) 

1199 

1200 self.assertCache(cache_manager) 

1201 

1202 # Test that the cache directory is not marked temporary 

1203 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1204 

1205 def assertCache(self, cache_manager): 

1206 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1207 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1208 

1209 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1210 self.assertIsInstance(uri, ButlerURI) 

1211 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1212 

1213 # Cached file should no longer exist but uncached file should be 

1214 # unaffectted. 

1215 self.assertFalse(self.files[0].exists()) 

1216 self.assertTrue(self.files[1].exists()) 

1217 

1218 # Should find this file and it should be within the cache directory. 

1219 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1220 self.assertTrue(found.exists()) 

1221 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1222 

1223 # Should not be able to find these in cache 

1224 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1225 self.assertIsNone(found) 

1226 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1227 self.assertIsNone(found) 

1228 

1229 def testNoCache(self): 

1230 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1231 for uri, ref in zip(self.files, self.refs): 

1232 self.assertFalse(cache_manager.should_be_cached(ref)) 

1233 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1234 with cache_manager.find_in_cache(ref, ".txt") as found: 

1235 self.assertIsNone(found, msg=f"{cache_manager}") 

1236 

1237 def _expiration_config(self, mode: str, threshold: int) -> str: 

1238 return f""" 

1239cached: 

1240 default: true 

1241 expiry: 

1242 mode: {mode} 

1243 threshold: {threshold} 

1244 cacheable: 

1245 unused: true 

1246 """ 

1247 

1248 def testCacheExpiryFiles(self): 

1249 threshold = 2 # Keep at least 2 files. 

1250 mode = "files" 

1251 config_str = self._expiration_config(mode, threshold) 

1252 

1253 cache_manager = self._make_cache_manager(config_str) 

1254 # Should end with datasets: 2, 3, 4 

1255 self.assertExpiration(cache_manager, 5, threshold + 1) 

1256 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1257 

1258 # Check that we will not expire a file that is actively in use. 

1259 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1260 self.assertIsNotNone(found) 

1261 

1262 # Trigger cache expiration that should remove the file 

1263 # we just retrieved. Should now have: 3, 4, 5 

1264 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1265 self.assertIsNotNone(cached) 

1266 

1267 # Cache should still report the standard file count. 

1268 self.assertEqual(cache_manager.file_count, threshold + 1) 

1269 

1270 # Add additional entry to cache. 

1271 # Should now have 4, 5, 6 

1272 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1273 self.assertIsNotNone(cached) 

1274 

1275 # Is the file still there? 

1276 self.assertTrue(found.exists()) 

1277 

1278 # Can we read it? 

1279 data = found.read() 

1280 self.assertGreater(len(data), 0) 

1281 

1282 # Outside context the file should no longer exist. 

1283 self.assertFalse(found.exists()) 

1284 

1285 # File count should not have changed. 

1286 self.assertEqual(cache_manager.file_count, threshold + 1) 

1287 

1288 # Dataset 2 was in the exempt directory but because hardlinks 

1289 # are used it was deleted from the main cache during cache expiry 

1290 # above and so should no longer be found. 

1291 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1292 self.assertIsNone(found) 

1293 

1294 # And the one stored after it is also gone. 

1295 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1296 self.assertIsNone(found) 

1297 

1298 # But dataset 4 is present. 

1299 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1300 self.assertIsNotNone(found) 

1301 

1302 # Adding a new dataset to the cache should now delete it. 

1303 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1304 

1305 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1306 self.assertIsNone(found) 

1307 

1308 def testCacheExpiryDatasets(self): 

1309 threshold = 2 # Keep 2 datasets. 

1310 mode = "datasets" 

1311 config_str = self._expiration_config(mode, threshold) 

1312 

1313 cache_manager = self._make_cache_manager(config_str) 

1314 self.assertExpiration(cache_manager, 5, threshold + 1) 

1315 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1316 

1317 def testCacheExpiryDatasetsComposite(self): 

1318 threshold = 2 # Keep 2 datasets. 

1319 mode = "datasets" 

1320 config_str = self._expiration_config(mode, threshold) 

1321 

1322 cache_manager = self._make_cache_manager(config_str) 

1323 

1324 n_datasets = 3 

1325 for i in range(n_datasets): 

1326 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1327 cached = cache_manager.move_to_cache(component_file, component_ref) 

1328 self.assertIsNotNone(cached) 

1329 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1330 

1331 # Write two new non-composite and the number of files should drop. 

1332 self.assertExpiration(cache_manager, 2, 5) 

1333 

1334 def testCacheExpirySize(self): 

1335 threshold = 55 # Each file is 10 bytes 

1336 mode = "size" 

1337 config_str = self._expiration_config(mode, threshold) 

1338 

1339 cache_manager = self._make_cache_manager(config_str) 

1340 self.assertExpiration(cache_manager, 10, 6) 

1341 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1342 

1343 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1344 """Insert the datasets and then check the number retained.""" 

1345 for i in range(n_datasets): 

1346 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1347 self.assertIsNotNone(cached) 

1348 

1349 self.assertEqual(cache_manager.file_count, n_retained) 

1350 

1351 # The oldest file should not be in the cache any more. 

1352 for i in range(n_datasets): 

1353 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1354 if i >= n_datasets - n_retained: 

1355 self.assertIsInstance(found, ButlerURI) 

1356 else: 

1357 self.assertIsNone(found) 

1358 

1359 def testCacheExpiryAge(self): 

1360 threshold = 1 # Expire older than 2 seconds 

1361 mode = "age" 

1362 config_str = self._expiration_config(mode, threshold) 

1363 

1364 cache_manager = self._make_cache_manager(config_str) 

1365 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1366 

1367 # Insert 3 files, then sleep, then insert more. 

1368 for i in range(2): 

1369 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1370 self.assertIsNotNone(cached) 

1371 time.sleep(2.0) 

1372 for j in range(4): 

1373 i = 2 + j # Continue the counting 

1374 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1375 self.assertIsNotNone(cached) 

1376 

1377 # Only the files written after the sleep should exist. 

1378 self.assertEqual(cache_manager.file_count, 4) 

1379 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1380 self.assertIsNone(found) 

1381 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1382 self.assertIsInstance(found, ButlerURI) 

1383 

1384 

1385if __name__ == "__main__": 1385 ↛ 1386line 1385 didn't jump to line 1386, because the condition on line 1385 was never true

1386 unittest.main()