Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import os 

23import unittest 

24import shutil 

25import yaml 

26import tempfile 

27import time 

28import lsst.utils.tests 

29 

30from lsst.utils import doImport 

31 

32from lsst.daf.butler import StorageClassFactory, StorageClass, DimensionUniverse, FileDataset 

33from lsst.daf.butler import DatastoreConfig, DatasetTypeNotSupportedError, DatastoreValidationError 

34from lsst.daf.butler.formatters.yaml import YamlFormatter 

35from lsst.daf.butler import (DatastoreCacheManager, DatastoreDisabledCacheManager, 

36 DatastoreCacheManagerConfig, Config, ButlerURI) 

37 

38from lsst.daf.butler.tests import (DatasetTestHelper, DatastoreTestHelper, BadWriteFormatter, 

39 BadNoWriteFormatter, MetricsExample, DummyRegistry) 

40 

41 

42TESTDIR = os.path.dirname(__file__) 

43 

44 

45def makeExampleMetrics(use_none=False): 

46 if use_none: 

47 array = None 

48 else: 

49 array = [563, 234, 456.7, 105, 2054, -1045] 

50 return MetricsExample({"AM1": 5.2, "AM2": 30.6}, 

51 {"a": [1, 2, 3], 

52 "b": {"blue": 5, "red": "green"}}, 

53 array, 

54 ) 

55 

56 

57class TransactionTestError(Exception): 

58 """Specific error for transactions, to prevent misdiagnosing 

59 that might otherwise occur when a standard exception is used. 

60 """ 

61 pass 

62 

63 

64class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

65 """Support routines for datastore testing""" 

66 root = None 

67 

68 @classmethod 

69 def setUpClass(cls): 

70 # Storage Classes are fixed for all datastores in these tests 

71 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

72 cls.storageClassFactory = StorageClassFactory() 

73 cls.storageClassFactory.addFromConfig(scConfigFile) 

74 

75 # Read the Datastore config so we can get the class 

76 # information (since we should not assume the constructor 

77 # name here, but rely on the configuration file itself) 

78 datastoreConfig = DatastoreConfig(cls.configFile) 

79 cls.datastoreType = doImport(datastoreConfig["cls"]) 

80 cls.universe = DimensionUniverse() 

81 

82 def setUp(self): 

83 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

84 

85 def tearDown(self): 

86 if self.root is not None and os.path.exists(self.root): 

87 shutil.rmtree(self.root, ignore_errors=True) 

88 

89 

90class DatastoreTests(DatastoreTestsBase): 

91 """Some basic tests of a simple datastore.""" 

92 

93 hasUnsupportedPut = True 

94 

95 def testConfigRoot(self): 

96 full = DatastoreConfig(self.configFile) 

97 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

98 newroot = "/random/location" 

99 self.datastoreType.setConfigRoot(newroot, config, full) 

100 if self.rootKeys: 

101 for k in self.rootKeys: 

102 self.assertIn(newroot, config[k]) 

103 

104 def testConstructor(self): 

105 datastore = self.makeDatastore() 

106 self.assertIsNotNone(datastore) 

107 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

108 

109 def testConfigurationValidation(self): 

110 datastore = self.makeDatastore() 

111 sc = self.storageClassFactory.getStorageClass("ThingOne") 

112 datastore.validateConfiguration([sc]) 

113 

114 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

115 if self.validationCanFail: 

116 with self.assertRaises(DatastoreValidationError): 

117 datastore.validateConfiguration([sc2], logFailures=True) 

118 

119 dimensions = self.universe.extract(("visit", "physical_filter")) 

120 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

121 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

122 datastore.validateConfiguration([ref]) 

123 

124 def testParameterValidation(self): 

125 """Check that parameters are validated""" 

126 sc = self.storageClassFactory.getStorageClass("ThingOne") 

127 dimensions = self.universe.extract(("visit", "physical_filter")) 

128 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

129 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

130 datastore = self.makeDatastore() 

131 data = {1: 2, 3: 4} 

132 datastore.put(data, ref) 

133 newdata = datastore.get(ref) 

134 self.assertEqual(data, newdata) 

135 with self.assertRaises(KeyError): 

136 newdata = datastore.get(ref, parameters={"missing": 5}) 

137 

138 def testBasicPutGet(self): 

139 metrics = makeExampleMetrics() 

140 datastore = self.makeDatastore() 

141 

142 # Create multiple storage classes for testing different formulations 

143 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

144 for sc in ("StructuredData", 

145 "StructuredDataJson", 

146 "StructuredDataPickle")] 

147 

148 dimensions = self.universe.extract(("visit", "physical_filter")) 

149 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

150 

151 for sc in storageClasses: 

152 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

153 print("Using storageClass: {}".format(sc.name)) 

154 datastore.put(metrics, ref) 

155 

156 # Does it exist? 

157 self.assertTrue(datastore.exists(ref)) 

158 

159 # Get 

160 metricsOut = datastore.get(ref, parameters=None) 

161 self.assertEqual(metrics, metricsOut) 

162 

163 uri = datastore.getURI(ref) 

164 self.assertEqual(uri.scheme, self.uriScheme) 

165 

166 # Get a component -- we need to construct new refs for them 

167 # with derived storage classes but with parent ID 

168 for comp in ("data", "output"): 

169 compRef = ref.makeComponentRef(comp) 

170 output = datastore.get(compRef) 

171 self.assertEqual(output, getattr(metricsOut, comp)) 

172 

173 uri = datastore.getURI(compRef) 

174 self.assertEqual(uri.scheme, self.uriScheme) 

175 

176 storageClass = sc 

177 

178 # Check that we can put a metric with None in a component and 

179 # get it back as None 

180 metricsNone = makeExampleMetrics(use_none=True) 

181 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

182 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

183 datastore.put(metricsNone, refNone) 

184 

185 comp = "data" 

186 for comp in ("data", "output"): 

187 compRef = refNone.makeComponentRef(comp) 

188 output = datastore.get(compRef) 

189 self.assertEqual(output, getattr(metricsNone, comp)) 

190 

191 # Check that a put fails if the dataset type is not supported 

192 if self.hasUnsupportedPut: 

193 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

194 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

195 with self.assertRaises(DatasetTypeNotSupportedError): 

196 datastore.put(metrics, ref) 

197 

198 # These should raise 

199 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId, id=10000) 

200 with self.assertRaises(FileNotFoundError): 

201 # non-existing file 

202 datastore.get(ref) 

203 

204 # Get a URI from it 

205 uri = datastore.getURI(ref, predict=True) 

206 self.assertEqual(uri.scheme, self.uriScheme) 

207 

208 with self.assertRaises(FileNotFoundError): 

209 datastore.getURI(ref) 

210 

211 def testTrustGetRequest(self): 

212 """Check that we can get datasets that registry knows nothing about. 

213 """ 

214 

215 datastore = self.makeDatastore() 

216 

217 # Skip test if the attribute is not defined 

218 if not hasattr(datastore, "trustGetRequest"): 

219 return 

220 

221 metrics = makeExampleMetrics() 

222 

223 i = 0 

224 for sc_name in ("StructuredData", "StructuredComposite"): 

225 i += 1 

226 datasetTypeName = f"metric{i}" 

227 

228 if sc_name == "StructuredComposite": 

229 disassembled = True 

230 else: 

231 disassembled = False 

232 

233 # Start datastore in default configuration of using registry 

234 datastore.trustGetRequest = False 

235 

236 # Create multiple storage classes for testing with or without 

237 # disassembly 

238 sc = self.storageClassFactory.getStorageClass(sc_name) 

239 dimensions = self.universe.extract(("visit", "physical_filter")) 

240 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"} 

241 

242 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

243 datastore.put(metrics, ref) 

244 

245 # Does it exist? 

246 self.assertTrue(datastore.exists(ref)) 

247 

248 # Get 

249 metricsOut = datastore.get(ref) 

250 self.assertEqual(metrics, metricsOut) 

251 

252 # Get the URI(s) 

253 primaryURI, componentURIs = datastore.getURIs(ref) 

254 if disassembled: 

255 self.assertIsNone(primaryURI) 

256 self.assertEqual(len(componentURIs), 3) 

257 else: 

258 self.assertIn(datasetTypeName, primaryURI.path) 

259 self.assertFalse(componentURIs) 

260 

261 # Delete registry entry so now we are trusting 

262 datastore.removeStoredItemInfo(ref) 

263 

264 # Now stop trusting and check that things break 

265 datastore.trustGetRequest = False 

266 

267 # Does it exist? 

268 self.assertFalse(datastore.exists(ref)) 

269 

270 with self.assertRaises(FileNotFoundError): 

271 datastore.get(ref) 

272 

273 with self.assertRaises(FileNotFoundError): 

274 datastore.get(ref.makeComponentRef("data")) 

275 

276 # URI should fail unless we ask for prediction 

277 with self.assertRaises(FileNotFoundError): 

278 datastore.getURIs(ref) 

279 

280 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

281 if disassembled: 

282 self.assertIsNone(predicted_primary) 

283 self.assertEqual(len(predicted_disassembled), 3) 

284 for uri in predicted_disassembled.values(): 

285 self.assertEqual(uri.fragment, "predicted") 

286 self.assertIn(datasetTypeName, uri.path) 

287 else: 

288 self.assertIn(datasetTypeName, predicted_primary.path) 

289 self.assertFalse(predicted_disassembled) 

290 self.assertEqual(predicted_primary.fragment, "predicted") 

291 

292 # Now enable registry-free trusting mode 

293 datastore.trustGetRequest = True 

294 

295 # Try again to get it 

296 metricsOut = datastore.get(ref) 

297 self.assertEqual(metricsOut, metrics) 

298 

299 # Does it exist? 

300 self.assertTrue(datastore.exists(ref)) 

301 

302 # Get a component 

303 comp = "data" 

304 compRef = ref.makeComponentRef(comp) 

305 output = datastore.get(compRef) 

306 self.assertEqual(output, getattr(metrics, comp)) 

307 

308 # Get the URI -- if we trust this should work even without 

309 # enabling prediction. 

310 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

311 self.assertEqual(primaryURI2, primaryURI) 

312 self.assertEqual(componentURIs2, componentURIs) 

313 

314 def testDisassembly(self): 

315 """Test disassembly within datastore.""" 

316 metrics = makeExampleMetrics() 

317 if self.isEphemeral: 

318 # in-memory datastore does not disassemble 

319 return 

320 

321 # Create multiple storage classes for testing different formulations 

322 # of composites. One of these will not disassemble to provide 

323 # a reference. 

324 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

325 for sc in ("StructuredComposite", 

326 "StructuredCompositeTestA", 

327 "StructuredCompositeTestB", 

328 "StructuredCompositeReadComp", 

329 "StructuredData", # No disassembly 

330 "StructuredCompositeReadCompNoDisassembly", 

331 )] 

332 

333 # Create the test datastore 

334 datastore = self.makeDatastore() 

335 

336 # Dummy dataId 

337 dimensions = self.universe.extract(("visit", "physical_filter")) 

338 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

339 

340 for i, sc in enumerate(storageClasses): 

341 with self.subTest(storageClass=sc.name): 

342 # Create a different dataset type each time round 

343 # so that a test failure in this subtest does not trigger 

344 # a cascade of tests because of file clashes 

345 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, 

346 conform=False) 

347 

348 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

349 

350 datastore.put(metrics, ref) 

351 

352 baseURI, compURIs = datastore.getURIs(ref) 

353 if disassembled: 

354 self.assertIsNone(baseURI) 

355 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

356 else: 

357 self.assertIsNotNone(baseURI) 

358 self.assertEqual(compURIs, {}) 

359 

360 metrics_get = datastore.get(ref) 

361 self.assertEqual(metrics_get, metrics) 

362 

363 # Retrieve the composite with read parameter 

364 stop = 4 

365 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

366 self.assertEqual(metrics_get.summary, metrics.summary) 

367 self.assertEqual(metrics_get.output, metrics.output) 

368 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

369 

370 # Retrieve a component 

371 data = datastore.get(ref.makeComponentRef("data")) 

372 self.assertEqual(data, metrics.data) 

373 

374 # On supported storage classes attempt to access a read 

375 # only component 

376 if "ReadComp" in sc.name: 

377 cRef = ref.makeComponentRef("counter") 

378 counter = datastore.get(cRef) 

379 self.assertEqual(counter, len(metrics.data)) 

380 

381 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

382 self.assertEqual(counter, stop) 

383 

384 datastore.remove(ref) 

385 

386 def testRegistryCompositePutGet(self): 

387 """Tests the case where registry disassembles and puts to datastore. 

388 """ 

389 metrics = makeExampleMetrics() 

390 datastore = self.makeDatastore() 

391 

392 # Create multiple storage classes for testing different formulations 

393 # of composites 

394 storageClasses = [self.storageClassFactory.getStorageClass(sc) 

395 for sc in ("StructuredComposite", 

396 "StructuredCompositeTestA", 

397 "StructuredCompositeTestB", 

398 )] 

399 

400 dimensions = self.universe.extract(("visit", "physical_filter")) 

401 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

402 

403 for sc in storageClasses: 

404 print("Using storageClass: {}".format(sc.name)) 

405 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, 

406 conform=False) 

407 

408 components = sc.delegate().disassemble(metrics) 

409 self.assertTrue(components) 

410 

411 compsRead = {} 

412 for compName, compInfo in components.items(): 

413 compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions, 

414 components[compName].storageClass, dataId, 

415 conform=False) 

416 

417 print("Writing component {} with {}".format(compName, compRef.datasetType.storageClass.name)) 

418 datastore.put(compInfo.component, compRef) 

419 

420 uri = datastore.getURI(compRef) 

421 self.assertEqual(uri.scheme, self.uriScheme) 

422 

423 compsRead[compName] = datastore.get(compRef) 

424 

425 # We can generate identical files for each storage class 

426 # so remove the component here 

427 datastore.remove(compRef) 

428 

429 # combine all the components we read back into a new composite 

430 metricsOut = sc.delegate().assemble(compsRead) 

431 self.assertEqual(metrics, metricsOut) 

432 

433 def prepDeleteTest(self): 

434 metrics = makeExampleMetrics() 

435 datastore = self.makeDatastore() 

436 # Put 

437 dimensions = self.universe.extract(("visit", "physical_filter")) 

438 dataId = {"instrument": "dummy", "visit": 638, "physical_filter": "U"} 

439 

440 sc = self.storageClassFactory.getStorageClass("StructuredData") 

441 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

442 datastore.put(metrics, ref) 

443 

444 # Does it exist? 

445 self.assertTrue(datastore.exists(ref)) 

446 

447 # Get 

448 metricsOut = datastore.get(ref) 

449 self.assertEqual(metrics, metricsOut) 

450 

451 return datastore, ref 

452 

453 def testRemove(self): 

454 datastore, ref = self.prepDeleteTest() 

455 

456 # Remove 

457 datastore.remove(ref) 

458 

459 # Does it exist? 

460 self.assertFalse(datastore.exists(ref)) 

461 

462 # Do we now get a predicted URI? 

463 uri = datastore.getURI(ref, predict=True) 

464 self.assertEqual(uri.fragment, "predicted") 

465 

466 # Get should now fail 

467 with self.assertRaises(FileNotFoundError): 

468 datastore.get(ref) 

469 # Can only delete once 

470 with self.assertRaises(FileNotFoundError): 

471 datastore.remove(ref) 

472 

473 def testForget(self): 

474 datastore, ref = self.prepDeleteTest() 

475 

476 # Remove 

477 datastore.forget([ref]) 

478 

479 # Does it exist (as far as we know)? 

480 self.assertFalse(datastore.exists(ref)) 

481 

482 # Do we now get a predicted URI? 

483 uri = datastore.getURI(ref, predict=True) 

484 self.assertEqual(uri.fragment, "predicted") 

485 

486 # Get should now fail 

487 with self.assertRaises(FileNotFoundError): 

488 datastore.get(ref) 

489 

490 # Forgetting again is a silent no-op 

491 datastore.forget([ref]) 

492 

493 # Predicted URI should still point to the file. 

494 self.assertTrue(uri.exists()) 

495 

496 def testTransfer(self): 

497 metrics = makeExampleMetrics() 

498 

499 dimensions = self.universe.extract(("visit", "physical_filter")) 

500 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"} 

501 

502 sc = self.storageClassFactory.getStorageClass("StructuredData") 

503 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

504 

505 inputDatastore = self.makeDatastore("test_input_datastore") 

506 outputDatastore = self.makeDatastore("test_output_datastore") 

507 

508 inputDatastore.put(metrics, ref) 

509 outputDatastore.transfer(inputDatastore, ref) 

510 

511 metricsOut = outputDatastore.get(ref) 

512 self.assertEqual(metrics, metricsOut) 

513 

514 def testBasicTransaction(self): 

515 datastore = self.makeDatastore() 

516 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

517 dimensions = self.universe.extract(("visit", "physical_filter")) 

518 nDatasets = 6 

519 dataIds = [{"instrument": "dummy", "visit": i, "physical_filter": "V"} for i in range(nDatasets)] 

520 data = [(self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

521 makeExampleMetrics(),) 

522 for dataId in dataIds] 

523 succeed = data[:nDatasets//2] 

524 fail = data[nDatasets//2:] 

525 # All datasets added in this transaction should continue to exist 

526 with datastore.transaction(): 

527 for ref, metrics in succeed: 

528 datastore.put(metrics, ref) 

529 # Whereas datasets added in this transaction should not 

530 with self.assertRaises(TransactionTestError): 

531 with datastore.transaction(): 

532 for ref, metrics in fail: 

533 datastore.put(metrics, ref) 

534 raise TransactionTestError("This should propagate out of the context manager") 

535 # Check for datasets that should exist 

536 for ref, metrics in succeed: 

537 # Does it exist? 

538 self.assertTrue(datastore.exists(ref)) 

539 # Get 

540 metricsOut = datastore.get(ref, parameters=None) 

541 self.assertEqual(metrics, metricsOut) 

542 # URI 

543 uri = datastore.getURI(ref) 

544 self.assertEqual(uri.scheme, self.uriScheme) 

545 # Check for datasets that should not exist 

546 for ref, _ in fail: 

547 # These should raise 

548 with self.assertRaises(FileNotFoundError): 

549 # non-existing file 

550 datastore.get(ref) 

551 with self.assertRaises(FileNotFoundError): 

552 datastore.getURI(ref) 

553 

554 def testNestedTransaction(self): 

555 datastore = self.makeDatastore() 

556 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

557 dimensions = self.universe.extract(("visit", "physical_filter")) 

558 metrics = makeExampleMetrics() 

559 

560 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

561 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

562 conform=False) 

563 datastore.put(metrics, refBefore) 

564 with self.assertRaises(TransactionTestError): 

565 with datastore.transaction(): 

566 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V"} 

567 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

568 conform=False) 

569 datastore.put(metrics, refOuter) 

570 with datastore.transaction(): 

571 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V"} 

572 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

573 conform=False) 

574 datastore.put(metrics, refInner) 

575 # All datasets should exist 

576 for ref in (refBefore, refOuter, refInner): 

577 metricsOut = datastore.get(ref, parameters=None) 

578 self.assertEqual(metrics, metricsOut) 

579 raise TransactionTestError("This should roll back the transaction") 

580 # Dataset(s) inserted before the transaction should still exist 

581 metricsOut = datastore.get(refBefore, parameters=None) 

582 self.assertEqual(metrics, metricsOut) 

583 # But all datasets inserted during the (rolled back) transaction 

584 # should be gone 

585 with self.assertRaises(FileNotFoundError): 

586 datastore.get(refOuter) 

587 with self.assertRaises(FileNotFoundError): 

588 datastore.get(refInner) 

589 

590 def _prepareIngestTest(self): 

591 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

592 dimensions = self.universe.extract(("visit", "physical_filter")) 

593 metrics = makeExampleMetrics() 

594 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

595 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

596 return metrics, ref 

597 

598 def runIngestTest(self, func, expectOutput=True): 

599 metrics, ref = self._prepareIngestTest() 

600 # The file will be deleted after the test. 

601 # For symlink tests this leads to a situation where the datastore 

602 # points to a file that does not exist. This will make os.path.exist 

603 # return False but then the new symlink will fail with 

604 # FileExistsError later in the code so the test still passes. 

605 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

606 with open(path, 'w') as fd: 

607 yaml.dump(metrics._asdict(), stream=fd) 

608 func(metrics, path, ref) 

609 

610 def testIngestNoTransfer(self): 

611 """Test ingesting existing files with no transfer. 

612 """ 

613 for mode in (None, "auto"): 

614 

615 # Some datastores have auto but can't do in place transfer 

616 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

617 continue 

618 

619 with self.subTest(mode=mode): 

620 datastore = self.makeDatastore() 

621 

622 def succeed(obj, path, ref): 

623 """Ingest a file already in the datastore root.""" 

624 # first move it into the root, and adjust the path 

625 # accordingly 

626 path = shutil.copy(path, datastore.root.ospath) 

627 path = os.path.relpath(path, start=datastore.root.ospath) 

628 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

629 self.assertEqual(obj, datastore.get(ref)) 

630 

631 def failInputDoesNotExist(obj, path, ref): 

632 """Can't ingest files if we're given a bad path.""" 

633 with self.assertRaises(FileNotFoundError): 

634 datastore.ingest(FileDataset(path="this-file-does-not-exist.yaml", refs=ref), 

635 transfer=mode) 

636 self.assertFalse(datastore.exists(ref)) 

637 

638 def failOutsideRoot(obj, path, ref): 

639 """Can't ingest files outside of datastore root unless 

640 auto.""" 

641 if mode == "auto": 

642 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

643 self.assertTrue(datastore.exists(ref)) 

644 else: 

645 with self.assertRaises(RuntimeError): 

646 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

647 self.assertFalse(datastore.exists(ref)) 

648 

649 def failNotImplemented(obj, path, ref): 

650 with self.assertRaises(NotImplementedError): 

651 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

652 

653 if mode in self.ingestTransferModes: 

654 self.runIngestTest(failOutsideRoot) 

655 self.runIngestTest(failInputDoesNotExist) 

656 self.runIngestTest(succeed) 

657 else: 

658 self.runIngestTest(failNotImplemented) 

659 

660 def testIngestTransfer(self): 

661 """Test ingesting existing files after transferring them. 

662 """ 

663 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

664 with self.subTest(mode=mode): 

665 datastore = self.makeDatastore(mode) 

666 

667 def succeed(obj, path, ref): 

668 """Ingest a file by transferring it to the template 

669 location.""" 

670 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

671 self.assertEqual(obj, datastore.get(ref)) 

672 

673 def failInputDoesNotExist(obj, path, ref): 

674 """Can't ingest files if we're given a bad path.""" 

675 with self.assertRaises(FileNotFoundError): 

676 # Ensure the file does not look like it is in 

677 # datastore for auto mode 

678 datastore.ingest(FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), 

679 transfer=mode) 

680 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

681 

682 def failOutputExists(obj, path, ref): 

683 """Can't ingest files if transfer destination already 

684 exists.""" 

685 with self.assertRaises(FileExistsError): 

686 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

687 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

688 

689 def failNotImplemented(obj, path, ref): 

690 with self.assertRaises(NotImplementedError): 

691 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

692 

693 if mode in self.ingestTransferModes: 

694 self.runIngestTest(failInputDoesNotExist) 

695 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

696 self.runIngestTest(failOutputExists) 

697 else: 

698 self.runIngestTest(failNotImplemented) 

699 

700 def testIngestSymlinkOfSymlink(self): 

701 """Special test for symlink to a symlink ingest""" 

702 metrics, ref = self._prepareIngestTest() 

703 # The aim of this test is to create a dataset on disk, then 

704 # create a symlink to it and finally ingest the symlink such that 

705 # the symlink in the datastore points to the original dataset. 

706 for mode in ("symlink", "relsymlink"): 

707 if mode not in self.ingestTransferModes: 

708 continue 

709 

710 print(f"Trying mode {mode}") 

711 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

712 with open(realpath, 'w') as fd: 

713 yaml.dump(metrics._asdict(), stream=fd) 

714 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

715 os.symlink(os.path.abspath(realpath), sympath) 

716 

717 datastore = self.makeDatastore() 

718 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

719 

720 uri = datastore.getURI(ref) 

721 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

722 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

723 

724 linkTarget = os.readlink(uri.ospath) 

725 if mode == "relsymlink": 

726 self.assertFalse(os.path.isabs(linkTarget)) 

727 else: 

728 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

729 

730 # Check that we can get the dataset back regardless of mode 

731 metric2 = datastore.get(ref) 

732 self.assertEqual(metric2, metrics) 

733 

734 # Cleanup the file for next time round loop 

735 # since it will get the same file name in store 

736 datastore.remove(ref) 

737 

738 

739class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

740 """PosixDatastore specialization""" 

741 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

742 uriScheme = "file" 

743 canIngestNoTransferAuto = True 

744 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

745 isEphemeral = False 

746 rootKeys = ("root",) 

747 validationCanFail = True 

748 

749 def setUp(self): 

750 # Override the working directory before calling the base class 

751 self.root = tempfile.mkdtemp(dir=TESTDIR) 

752 super().setUp() 

753 

754 

755class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

756 """Posix datastore tests but with checksums disabled.""" 

757 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

758 

759 def testChecksum(self): 

760 """Ensure that checksums have not been calculated.""" 

761 

762 datastore = self.makeDatastore() 

763 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

764 dimensions = self.universe.extract(("visit", "physical_filter")) 

765 metrics = makeExampleMetrics() 

766 

767 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V"} 

768 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, 

769 conform=False) 

770 

771 # Configuration should have disabled checksum calculation 

772 datastore.put(metrics, ref) 

773 infos = datastore.getStoredItemsInfo(ref) 

774 self.assertIsNone(infos[0].checksum) 

775 

776 # Remove put back but with checksums enabled explicitly 

777 datastore.remove(ref) 

778 datastore.useChecksum = True 

779 datastore.put(metrics, ref) 

780 

781 infos = datastore.getStoredItemsInfo(ref) 

782 self.assertIsNotNone(infos[0].checksum) 

783 

784 

785class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

786 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

787 

788 def setUp(self): 

789 # Override the working directory before calling the base class 

790 self.root = tempfile.mkdtemp(dir=TESTDIR) 

791 super().setUp() 

792 

793 def testCleanup(self): 

794 """Test that a failed formatter write does cleanup a partial file.""" 

795 metrics = makeExampleMetrics() 

796 datastore = self.makeDatastore() 

797 

798 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

799 

800 dimensions = self.universe.extract(("visit", "physical_filter")) 

801 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

802 

803 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

804 

805 # Determine where the file will end up (we assume Formatters use 

806 # the same file extension) 

807 expectedUri = datastore.getURI(ref, predict=True) 

808 self.assertEqual(expectedUri.fragment, "predicted") 

809 

810 self.assertEqual(expectedUri.getExtension(), ".yaml", 

811 f"Is there a file extension in {expectedUri}") 

812 

813 # Try formatter that fails and formatter that fails and leaves 

814 # a file behind 

815 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

816 with self.subTest(formatter=formatter): 

817 

818 # Monkey patch the formatter 

819 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, 

820 overwrite=True) 

821 

822 # Try to put the dataset, it should fail 

823 with self.assertRaises(Exception): 

824 datastore.put(metrics, ref) 

825 

826 # Check that there is no file on disk 

827 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

828 

829 # Check that there is a directory 

830 dir = expectedUri.dirname() 

831 self.assertTrue(dir.exists(), 

832 f"Check for existence of directory {dir}") 

833 

834 # Force YamlFormatter and check that this time a file is written 

835 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, 

836 overwrite=True) 

837 datastore.put(metrics, ref) 

838 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

839 datastore.remove(ref) 

840 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

841 

842 

843class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

844 """PosixDatastore specialization""" 

845 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

846 uriScheme = "mem" 

847 hasUnsupportedPut = False 

848 ingestTransferModes = () 

849 isEphemeral = True 

850 rootKeys = None 

851 validationCanFail = False 

852 

853 

854class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

855 """ChainedDatastore specialization using a POSIXDatastore""" 

856 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

857 hasUnsupportedPut = False 

858 canIngestNoTransferAuto = False 

859 ingestTransferModes = ("copy", "hardlink", "symlink", "relsymlink", "link", "auto") 

860 isEphemeral = False 

861 rootKeys = (".datastores.1.root", ".datastores.2.root") 

862 validationCanFail = True 

863 

864 

865class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

866 """ChainedDatastore specialization using all InMemoryDatastore""" 

867 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

868 validationCanFail = False 

869 

870 

871class DatastoreConstraintsTests(DatastoreTestsBase): 

872 """Basic tests of constraints model of Datastores.""" 

873 

874 def testConstraints(self): 

875 """Test constraints model. Assumes that each test class has the 

876 same constraints.""" 

877 metrics = makeExampleMetrics() 

878 datastore = self.makeDatastore() 

879 

880 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

881 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

882 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

883 dataId = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

884 

885 # Write empty file suitable for ingest check (JSON and YAML variants) 

886 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

887 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

888 for datasetTypeName, sc, accepted in (("metric", sc1, True), ("metric2", sc1, False), 

889 ("metric33", sc1, True), ("metric2", sc2, True)): 

890 # Choose different temp file depending on StorageClass 

891 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

892 

893 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

894 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

895 if accepted: 

896 datastore.put(metrics, ref) 

897 self.assertTrue(datastore.exists(ref)) 

898 datastore.remove(ref) 

899 

900 # Try ingest 

901 if self.canIngest: 

902 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

903 self.assertTrue(datastore.exists(ref)) 

904 datastore.remove(ref) 

905 else: 

906 with self.assertRaises(DatasetTypeNotSupportedError): 

907 datastore.put(metrics, ref) 

908 self.assertFalse(datastore.exists(ref)) 

909 

910 # Again with ingest 

911 if self.canIngest: 

912 with self.assertRaises(DatasetTypeNotSupportedError): 

913 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

914 self.assertFalse(datastore.exists(ref)) 

915 

916 

917class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

918 """PosixDatastore specialization""" 

919 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

920 canIngest = True 

921 

922 def setUp(self): 

923 # Override the working directory before calling the base class 

924 self.root = tempfile.mkdtemp(dir=TESTDIR) 

925 super().setUp() 

926 

927 

928class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

929 """InMemoryDatastore specialization""" 

930 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

931 canIngest = False 

932 

933 

934class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

935 """ChainedDatastore specialization using a POSIXDatastore and constraints 

936 at the ChainedDatstore """ 

937 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

938 

939 

940class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

941 """ChainedDatastore specialization using a POSIXDatastore""" 

942 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

943 

944 

945class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

946 """ChainedDatastore specialization using all InMemoryDatastore""" 

947 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

948 canIngest = False 

949 

950 

951class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

952 """Test that a chained datastore can control constraints per-datastore 

953 even if child datastore would accept.""" 

954 

955 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

956 

957 def setUp(self): 

958 # Override the working directory before calling the base class 

959 self.root = tempfile.mkdtemp(dir=TESTDIR) 

960 super().setUp() 

961 

962 def testConstraints(self): 

963 """Test chained datastore constraints model.""" 

964 metrics = makeExampleMetrics() 

965 datastore = self.makeDatastore() 

966 

967 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

968 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

969 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

970 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

971 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

972 

973 # Write empty file suitable for ingest check (JSON and YAML variants) 

974 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

975 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

976 

977 for typeName, dataId, sc, accept, ingest in (("metric", dataId1, sc1, (False, True, False), True), 

978 ("metric2", dataId1, sc1, (False, False, False), False), 

979 ("metric2", dataId2, sc1, (True, False, False), False), 

980 ("metric33", dataId2, sc2, (True, True, False), True), 

981 ("metric2", dataId1, sc2, (False, True, False), True)): 

982 

983 # Choose different temp file depending on StorageClass 

984 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

985 

986 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

987 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, 

988 conform=False) 

989 if any(accept): 

990 datastore.put(metrics, ref) 

991 self.assertTrue(datastore.exists(ref)) 

992 

993 # Check each datastore inside the chained datastore 

994 for childDatastore, expected in zip(datastore.datastores, accept): 

995 self.assertEqual(childDatastore.exists(ref), expected, 

996 f"Testing presence of {ref} in datastore {childDatastore.name}") 

997 

998 datastore.remove(ref) 

999 

1000 # Check that ingest works 

1001 if ingest: 

1002 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1003 self.assertTrue(datastore.exists(ref)) 

1004 

1005 # Check each datastore inside the chained datastore 

1006 for childDatastore, expected in zip(datastore.datastores, accept): 

1007 # Ephemeral datastores means InMemory at the moment 

1008 # and that does not accept ingest of files. 

1009 if childDatastore.isEphemeral: 

1010 expected = False 

1011 self.assertEqual(childDatastore.exists(ref), expected, 

1012 f"Testing presence of ingested {ref} in datastore" 

1013 f" {childDatastore.name}") 

1014 

1015 datastore.remove(ref) 

1016 else: 

1017 with self.assertRaises(DatasetTypeNotSupportedError): 

1018 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1019 

1020 else: 

1021 with self.assertRaises(DatasetTypeNotSupportedError): 

1022 datastore.put(metrics, ref) 

1023 self.assertFalse(datastore.exists(ref)) 

1024 

1025 # Again with ingest 

1026 with self.assertRaises(DatasetTypeNotSupportedError): 

1027 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1028 self.assertFalse(datastore.exists(ref)) 

1029 

1030 

1031class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1032 """Tests for datastore caching infrastructure.""" 

1033 

1034 @classmethod 

1035 def setUpClass(cls): 

1036 cls.storageClassFactory = StorageClassFactory() 

1037 cls.universe = DimensionUniverse() 

1038 

1039 # Ensure that we load the test storage class definitions. 

1040 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1041 cls.storageClassFactory.addFromConfig(scConfigFile) 

1042 

1043 def setUp(self): 

1044 self.id = 0 

1045 

1046 # Create a root that we can use for caching tests. 

1047 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1048 

1049 # Create some test dataset refs and associated test files 

1050 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1051 dimensions = self.universe.extract(("visit", "physical_filter")) 

1052 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V"} 

1053 

1054 # Create list of refs and list of temporary files 

1055 n_datasets = 10 

1056 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, 

1057 conform=False) for n in range(n_datasets)] 

1058 

1059 root_uri = ButlerURI(self.root, forceDirectory=True) 

1060 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1061 

1062 # Create test files. 

1063 for uri in self.files: 

1064 uri.write(b"0123456789") 

1065 

1066 # Create some composite refs with component files. 

1067 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1068 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, 

1069 conform=False) for n in range(3)] 

1070 self.comp_files = [] 

1071 self.comp_refs = [] 

1072 for n, ref in enumerate(self.composite_refs): 

1073 component_refs = [] 

1074 component_files = [] 

1075 for component in sc.components: 

1076 component_ref = ref.makeComponentRef(component) 

1077 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1078 component_refs.append(component_ref) 

1079 component_files.append(file) 

1080 file.write(b"9876543210") 

1081 

1082 self.comp_files.append(component_files) 

1083 self.comp_refs.append(component_refs) 

1084 

1085 def tearDown(self): 

1086 if self.root is not None and os.path.exists(self.root): 

1087 shutil.rmtree(self.root, ignore_errors=True) 

1088 

1089 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1090 config = Config.fromYaml(config_str) 

1091 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1092 

1093 def testNoCacheDir(self): 

1094 config_str = """ 

1095cached: 

1096 root: null 

1097 cacheable: 

1098 metric0: true 

1099 """ 

1100 cache_manager = self._make_cache_manager(config_str) 

1101 

1102 # Look inside to check we don't have a cache directory 

1103 self.assertIsNone(cache_manager._cache_directory) 

1104 

1105 self.assertCache(cache_manager) 

1106 

1107 # Test that the cache directory is marked temporary 

1108 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1109 

1110 def testNoCacheDirReversed(self): 

1111 """Use default caching status and metric1 to false""" 

1112 config_str = """ 

1113cached: 

1114 root: null 

1115 default: true 

1116 cacheable: 

1117 metric1: false 

1118 """ 

1119 cache_manager = self._make_cache_manager(config_str) 

1120 

1121 self.assertCache(cache_manager) 

1122 

1123 def testExplicitCacheDir(self): 

1124 config_str = f""" 

1125cached: 

1126 root: '{self.root}' 

1127 cacheable: 

1128 metric0: true 

1129 """ 

1130 cache_manager = self._make_cache_manager(config_str) 

1131 

1132 # Look inside to check we do have a cache directory. 

1133 self.assertEqual(cache_manager.cache_directory, 

1134 ButlerURI(self.root, forceDirectory=True)) 

1135 

1136 self.assertCache(cache_manager) 

1137 

1138 # Test that the cache directory is not marked temporary 

1139 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1140 

1141 def assertCache(self, cache_manager): 

1142 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1143 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1144 

1145 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1146 self.assertIsInstance(uri, ButlerURI) 

1147 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1148 

1149 # Cached file should no longer exist but uncached file should be 

1150 # unaffectted. 

1151 self.assertFalse(self.files[0].exists()) 

1152 self.assertTrue(self.files[1].exists()) 

1153 

1154 # Should find this file and it should be within the cache directory. 

1155 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1156 self.assertTrue(found.exists()) 

1157 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1158 

1159 # Should not be able to find these in cache 

1160 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1161 self.assertIsNone(found) 

1162 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1163 self.assertIsNone(found) 

1164 

1165 def testNoCache(self): 

1166 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1167 for uri, ref in zip(self.files, self.refs): 

1168 self.assertFalse(cache_manager.should_be_cached(ref)) 

1169 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1170 with cache_manager.find_in_cache(ref, ".txt") as found: 

1171 self.assertIsNone(found, msg=f"{cache_manager}") 

1172 

1173 def _expiration_config(self, mode: str, threshold: int) -> str: 

1174 return f""" 

1175cached: 

1176 default: true 

1177 expiry: 

1178 mode: {mode} 

1179 threshold: {threshold} 

1180 cacheable: 

1181 unused: true 

1182 """ 

1183 

1184 def testCacheExpiryFiles(self): 

1185 threshold = 2 # Keep at least 2 files. 

1186 mode = "files" 

1187 config_str = self._expiration_config(mode, threshold) 

1188 

1189 cache_manager = self._make_cache_manager(config_str) 

1190 # Should end with datasets: 2, 3, 4 

1191 self.assertExpiration(cache_manager, 5, threshold + 1) 

1192 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1193 

1194 # Check that we will not expire a file that is actively in use. 

1195 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1196 self.assertIsNotNone(found) 

1197 

1198 # Trigger cache expiration that should remove the file 

1199 # we just retrieved. Should now have: 3, 4, 5 

1200 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1201 self.assertIsNotNone(cached) 

1202 

1203 # Cache should still report the standard file count. 

1204 self.assertEqual(cache_manager.file_count, threshold + 1) 

1205 

1206 # Add additional entry to cache. 

1207 # Should now have 4, 5, 6 

1208 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1209 self.assertIsNotNone(cached) 

1210 

1211 # Is the file still there? 

1212 self.assertTrue(found.exists()) 

1213 

1214 # Can we read it? 

1215 data = found.read() 

1216 self.assertGreater(len(data), 0) 

1217 

1218 # Outside context the file should no longer exist. 

1219 self.assertFalse(found.exists()) 

1220 

1221 # File count should not have changed. 

1222 self.assertEqual(cache_manager.file_count, threshold + 1) 

1223 

1224 # Dataset 2 was in the exempt directory but because hardlinks 

1225 # are used it was deleted from the main cache during cache expiry 

1226 # above and so should no longer be found. 

1227 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1228 self.assertIsNone(found) 

1229 

1230 # And the one stored after it is also gone. 

1231 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1232 self.assertIsNone(found) 

1233 

1234 # But dataset 4 is present. 

1235 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1236 self.assertIsNotNone(found) 

1237 

1238 # Adding a new dataset to the cache should now delete it. 

1239 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1240 

1241 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1242 self.assertIsNone(found) 

1243 

1244 def testCacheExpiryDatasets(self): 

1245 threshold = 2 # Keep 2 datasets. 

1246 mode = "datasets" 

1247 config_str = self._expiration_config(mode, threshold) 

1248 

1249 cache_manager = self._make_cache_manager(config_str) 

1250 self.assertExpiration(cache_manager, 5, threshold + 1) 

1251 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1252 

1253 def testCacheExpiryDatasetsComposite(self): 

1254 threshold = 2 # Keep 2 datasets. 

1255 mode = "datasets" 

1256 config_str = self._expiration_config(mode, threshold) 

1257 

1258 cache_manager = self._make_cache_manager(config_str) 

1259 

1260 n_datasets = 3 

1261 for i in range(n_datasets): 

1262 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1263 cached = cache_manager.move_to_cache(component_file, component_ref) 

1264 self.assertIsNotNone(cached) 

1265 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1266 

1267 # Write two new non-composite and the number of files should drop. 

1268 self.assertExpiration(cache_manager, 2, 5) 

1269 

1270 def testCacheExpirySize(self): 

1271 threshold = 55 # Each file is 10 bytes 

1272 mode = "size" 

1273 config_str = self._expiration_config(mode, threshold) 

1274 

1275 cache_manager = self._make_cache_manager(config_str) 

1276 self.assertExpiration(cache_manager, 10, 6) 

1277 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1278 

1279 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1280 """Insert the datasets and then check the number retained.""" 

1281 for i in range(n_datasets): 

1282 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1283 self.assertIsNotNone(cached) 

1284 

1285 self.assertEqual(cache_manager.file_count, n_retained) 

1286 

1287 # The oldest file should not be in the cache any more. 

1288 for i in range(n_datasets): 

1289 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1290 if i >= n_datasets - n_retained: 

1291 self.assertIsInstance(found, ButlerURI) 

1292 else: 

1293 self.assertIsNone(found) 

1294 

1295 def testCacheExpiryAge(self): 

1296 threshold = 1 # Expire older than 2 seconds 

1297 mode = "age" 

1298 config_str = self._expiration_config(mode, threshold) 

1299 

1300 cache_manager = self._make_cache_manager(config_str) 

1301 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1302 

1303 # Insert 3 files, then sleep, then insert more. 

1304 for i in range(2): 

1305 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1306 self.assertIsNotNone(cached) 

1307 time.sleep(2.0) 

1308 for j in range(4): 

1309 i = 2 + j # Continue the counting 

1310 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1311 self.assertIsNotNone(cached) 

1312 

1313 # Only the files written after the sleep should exist. 

1314 self.assertEqual(cache_manager.file_count, 4) 

1315 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1316 self.assertIsNone(found) 

1317 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1318 self.assertIsInstance(found, ButlerURI) 

1319 

1320 

1321if __name__ == "__main__": 1321 ↛ 1322line 1321 didn't jump to line 1322, because the condition on line 1321 was never true

1322 unittest.main()