Coverage for tests/test_datastore.py: 11%

1015 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-12 10:56 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 StorageClass, 

53 StorageClassFactory, 

54 StoredFileInfo, 

55) 

56from lsst.daf.butler.formatters.yaml import YamlFormatter 

57from lsst.daf.butler.tests import ( 

58 BadNoWriteFormatter, 

59 BadWriteFormatter, 

60 DatasetTestHelper, 

61 DatastoreTestHelper, 

62 DummyRegistry, 

63 MetricsExample, 

64 MetricsExampleDataclass, 

65 MetricsExampleModel, 

66) 

67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

68from lsst.daf.butler.tests.utils import TestCaseMixin 

69from lsst.resources import ResourcePath 

70from lsst.utils import doImport 

71 

72TESTDIR = os.path.dirname(__file__) 

73 

74 

75def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

76 """Make example dataset that can be stored in butler.""" 

77 if use_none: 

78 array = None 

79 else: 

80 array = [563, 234, 456.7, 105, 2054, -1045] 

81 return MetricsExample( 

82 {"AM1": 5.2, "AM2": 30.6}, 

83 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

84 array, 

85 ) 

86 

87 

88class TransactionTestError(Exception): 

89 """Specific error for transactions, to prevent misdiagnosing 

90 that might otherwise occur when a standard exception is used. 

91 """ 

92 

93 pass 

94 

95 

96class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

97 """Support routines for datastore testing""" 

98 

99 root: str | None = None 

100 universe: DimensionUniverse 

101 storageClassFactory: StorageClassFactory 

102 

103 @classmethod 

104 def setUpClass(cls) -> None: 

105 # Storage Classes are fixed for all datastores in these tests 

106 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

107 cls.storageClassFactory = StorageClassFactory() 

108 cls.storageClassFactory.addFromConfig(scConfigFile) 

109 

110 # Read the Datastore config so we can get the class 

111 # information (since we should not assume the constructor 

112 # name here, but rely on the configuration file itself) 

113 datastoreConfig = DatastoreConfig(cls.configFile) 

114 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

115 cls.universe = DimensionUniverse() 

116 

117 def setUp(self) -> None: 

118 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

119 

120 def tearDown(self) -> None: 

121 if self.root is not None and os.path.exists(self.root): 

122 shutil.rmtree(self.root, ignore_errors=True) 

123 

124 

125class DatastoreTests(DatastoreTestsBase): 

126 """Some basic tests of a simple datastore.""" 

127 

128 hasUnsupportedPut = True 

129 rootKeys: tuple[str, ...] | None = None 

130 isEphemeral: bool = False 

131 validationCanFail: bool = False 

132 

133 def testConfigRoot(self) -> None: 

134 full = DatastoreConfig(self.configFile) 

135 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

136 newroot = "/random/location" 

137 self.datastoreType.setConfigRoot(newroot, config, full) 

138 if self.rootKeys: 

139 for k in self.rootKeys: 

140 self.assertIn(newroot, config[k]) 

141 

142 def testConstructor(self) -> None: 

143 datastore = self.makeDatastore() 

144 self.assertIsNotNone(datastore) 

145 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

146 

147 def testConfigurationValidation(self) -> None: 

148 datastore = self.makeDatastore() 

149 sc = self.storageClassFactory.getStorageClass("ThingOne") 

150 datastore.validateConfiguration([sc]) 

151 

152 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

153 if self.validationCanFail: 

154 with self.assertRaises(DatastoreValidationError): 

155 datastore.validateConfiguration([sc2], logFailures=True) 

156 

157 dimensions = self.universe.extract(("visit", "physical_filter")) 

158 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

159 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

160 datastore.validateConfiguration([ref]) 

161 

162 def testParameterValidation(self) -> None: 

163 """Check that parameters are validated""" 

164 sc = self.storageClassFactory.getStorageClass("ThingOne") 

165 dimensions = self.universe.extract(("visit", "physical_filter")) 

166 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

168 datastore = self.makeDatastore() 

169 data = {1: 2, 3: 4} 

170 datastore.put(data, ref) 

171 newdata = datastore.get(ref) 

172 self.assertEqual(data, newdata) 

173 with self.assertRaises(KeyError): 

174 newdata = datastore.get(ref, parameters={"missing": 5}) 

175 

176 def testBasicPutGet(self) -> None: 

177 metrics = makeExampleMetrics() 

178 datastore = self.makeDatastore() 

179 

180 # Create multiple storage classes for testing different formulations 

181 storageClasses = [ 

182 self.storageClassFactory.getStorageClass(sc) 

183 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

184 ] 

185 

186 dimensions = self.universe.extract(("visit", "physical_filter")) 

187 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

188 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}) 

189 

190 for sc in storageClasses: 

191 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

192 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

193 

194 # Make sure that using getManyURIs without predicting before the 

195 # dataset has been put raises. 

196 with self.assertRaises(FileNotFoundError): 

197 datastore.getManyURIs([ref], predict=False) 

198 

199 # Make sure that using getManyURIs with predicting before the 

200 # dataset has been put predicts the URI. 

201 uris = datastore.getManyURIs([ref, ref2], predict=True) 

202 self.assertIn("52", uris[ref].primaryURI.geturl()) 

203 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

204 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

205 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

206 

207 datastore.put(metrics, ref) 

208 

209 # Does it exist? 

210 self.assertTrue(datastore.exists(ref)) 

211 self.assertTrue(datastore.knows(ref)) 

212 multi = datastore.knows_these([ref]) 

213 self.assertTrue(multi[ref]) 

214 multi = datastore.mexists([ref, ref2]) 

215 self.assertTrue(multi[ref]) 

216 self.assertFalse(multi[ref2]) 

217 

218 # Get 

219 metricsOut = datastore.get(ref, parameters=None) 

220 self.assertEqual(metrics, metricsOut) 

221 

222 uri = datastore.getURI(ref) 

223 self.assertEqual(uri.scheme, self.uriScheme) 

224 

225 uris = datastore.getManyURIs([ref]) 

226 self.assertEqual(len(uris), 1) 

227 ref, uri = uris.popitem() 

228 self.assertTrue(uri.primaryURI.exists()) 

229 self.assertFalse(uri.componentURIs) 

230 

231 # Get a component -- we need to construct new refs for them 

232 # with derived storage classes but with parent ID 

233 for comp in ("data", "output"): 

234 compRef = ref.makeComponentRef(comp) 

235 output = datastore.get(compRef) 

236 self.assertEqual(output, getattr(metricsOut, comp)) 

237 

238 uri = datastore.getURI(compRef) 

239 self.assertEqual(uri.scheme, self.uriScheme) 

240 

241 uris = datastore.getManyURIs([compRef]) 

242 self.assertEqual(len(uris), 1) 

243 

244 storageClass = sc 

245 

246 # Check that we can put a metric with None in a component and 

247 # get it back as None 

248 metricsNone = makeExampleMetrics(use_none=True) 

249 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

250 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

251 datastore.put(metricsNone, refNone) 

252 

253 comp = "data" 

254 for comp in ("data", "output"): 

255 compRef = refNone.makeComponentRef(comp) 

256 output = datastore.get(compRef) 

257 self.assertEqual(output, getattr(metricsNone, comp)) 

258 

259 # Check that a put fails if the dataset type is not supported 

260 if self.hasUnsupportedPut: 

261 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

262 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

263 with self.assertRaises(DatasetTypeNotSupportedError): 

264 datastore.put(metrics, ref) 

265 

266 # These should raise 

267 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

268 with self.assertRaises(FileNotFoundError): 

269 # non-existing file 

270 datastore.get(ref) 

271 

272 # Get a URI from it 

273 uri = datastore.getURI(ref, predict=True) 

274 self.assertEqual(uri.scheme, self.uriScheme) 

275 

276 with self.assertRaises(FileNotFoundError): 

277 datastore.getURI(ref) 

278 

279 def testTrustGetRequest(self) -> None: 

280 """Check that we can get datasets that registry knows nothing about.""" 

281 datastore = self.makeDatastore() 

282 

283 # Skip test if the attribute is not defined 

284 if not hasattr(datastore, "trustGetRequest"): 

285 return 

286 

287 metrics = makeExampleMetrics() 

288 

289 i = 0 

290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

291 i += 1 

292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

293 

294 if sc_name == "StructuredComposite": 

295 disassembled = True 

296 else: 

297 disassembled = False 

298 

299 # Start datastore in default configuration of using registry 

300 datastore.trustGetRequest = False 

301 

302 # Create multiple storage classes for testing with or without 

303 # disassembly 

304 sc = self.storageClassFactory.getStorageClass(sc_name) 

305 dimensions = self.universe.extract(("visit", "physical_filter")) 

306 

307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}) 

308 

309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

310 datastore.put(metrics, ref) 

311 

312 # Does it exist? 

313 self.assertTrue(datastore.exists(ref)) 

314 self.assertTrue(datastore.knows(ref)) 

315 multi = datastore.knows_these([ref]) 

316 self.assertTrue(multi[ref]) 

317 multi = datastore.mexists([ref]) 

318 self.assertTrue(multi[ref]) 

319 

320 # Get 

321 metricsOut = datastore.get(ref) 

322 self.assertEqual(metrics, metricsOut) 

323 

324 # Get the URI(s) 

325 primaryURI, componentURIs = datastore.getURIs(ref) 

326 if disassembled: 

327 self.assertIsNone(primaryURI) 

328 self.assertEqual(len(componentURIs), 3) 

329 else: 

330 self.assertIn(datasetTypeName, primaryURI.path) 

331 self.assertFalse(componentURIs) 

332 

333 # Delete registry entry so now we are trusting 

334 datastore.removeStoredItemInfo(ref) 

335 

336 # Now stop trusting and check that things break 

337 datastore.trustGetRequest = False 

338 

339 # Does it exist? 

340 self.assertFalse(datastore.exists(ref)) 

341 self.assertFalse(datastore.knows(ref)) 

342 multi = datastore.knows_these([ref]) 

343 self.assertFalse(multi[ref]) 

344 multi = datastore.mexists([ref]) 

345 self.assertFalse(multi[ref]) 

346 

347 with self.assertRaises(FileNotFoundError): 

348 datastore.get(ref) 

349 

350 if sc_name != "StructuredDataNoComponents": 

351 with self.assertRaises(FileNotFoundError): 

352 datastore.get(ref.makeComponentRef("data")) 

353 

354 # URI should fail unless we ask for prediction 

355 with self.assertRaises(FileNotFoundError): 

356 datastore.getURIs(ref) 

357 

358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

359 if disassembled: 

360 self.assertIsNone(predicted_primary) 

361 self.assertEqual(len(predicted_disassembled), 3) 

362 for uri in predicted_disassembled.values(): 

363 self.assertEqual(uri.fragment, "predicted") 

364 self.assertIn(datasetTypeName, uri.path) 

365 else: 

366 self.assertIn(datasetTypeName, predicted_primary.path) 

367 self.assertFalse(predicted_disassembled) 

368 self.assertEqual(predicted_primary.fragment, "predicted") 

369 

370 # Now enable registry-free trusting mode 

371 datastore.trustGetRequest = True 

372 

373 # Try again to get it 

374 metricsOut = datastore.get(ref) 

375 self.assertEqual(metricsOut, metrics) 

376 

377 # Does it exist? 

378 self.assertTrue(datastore.exists(ref)) 

379 

380 # Get a component 

381 if sc_name != "StructuredDataNoComponents": 

382 comp = "data" 

383 compRef = ref.makeComponentRef(comp) 

384 output = datastore.get(compRef) 

385 self.assertEqual(output, getattr(metrics, comp)) 

386 

387 # Get the URI -- if we trust this should work even without 

388 # enabling prediction. 

389 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

390 self.assertEqual(primaryURI2, primaryURI) 

391 self.assertEqual(componentURIs2, componentURIs) 

392 

393 # Check for compatible storage class. 

394 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

395 # Make new dataset ref with compatible storage class. 

396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

397 

398 # Without `set_retrieve_dataset_type_method` it will fail to 

399 # find correct file. 

400 self.assertFalse(datastore.exists(ref_comp)) 

401 with self.assertRaises(FileNotFoundError): 

402 datastore.get(ref_comp) 

403 with self.assertRaises(FileNotFoundError): 

404 datastore.get(ref, storageClass="StructuredDataDictJson") 

405 

406 # Need a special method to generate stored dataset type. 

407 def _stored_dataset_type(name: str) -> DatasetType: 

408 if name == ref.datasetType.name: 

409 return ref.datasetType 

410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

411 

412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

413 

414 # Storage class override with original dataset ref. 

415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

416 self.assertIsInstance(metrics_as_dict, dict) 

417 

418 # get() should return a dict now. 

419 metrics_as_dict = datastore.get(ref_comp) 

420 self.assertIsInstance(metrics_as_dict, dict) 

421 

422 # exists() should work as well. 

423 self.assertTrue(datastore.exists(ref_comp)) 

424 

425 datastore.set_retrieve_dataset_type_method(None) 

426 

427 def testDisassembly(self) -> None: 

428 """Test disassembly within datastore.""" 

429 metrics = makeExampleMetrics() 

430 if self.isEphemeral: 

431 # in-memory datastore does not disassemble 

432 return 

433 

434 # Create multiple storage classes for testing different formulations 

435 # of composites. One of these will not disassemble to provide 

436 # a reference. 

437 storageClasses = [ 

438 self.storageClassFactory.getStorageClass(sc) 

439 for sc in ( 

440 "StructuredComposite", 

441 "StructuredCompositeTestA", 

442 "StructuredCompositeTestB", 

443 "StructuredCompositeReadComp", 

444 "StructuredData", # No disassembly 

445 "StructuredCompositeReadCompNoDisassembly", 

446 ) 

447 ] 

448 

449 # Create the test datastore 

450 datastore = self.makeDatastore() 

451 

452 # Dummy dataId 

453 dimensions = self.universe.extract(("visit", "physical_filter")) 

454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

455 

456 for i, sc in enumerate(storageClasses): 

457 with self.subTest(storageClass=sc.name): 

458 # Create a different dataset type each time round 

459 # so that a test failure in this subtest does not trigger 

460 # a cascade of tests because of file clashes 

461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

462 

463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

464 

465 datastore.put(metrics, ref) 

466 

467 baseURI, compURIs = datastore.getURIs(ref) 

468 if disassembled: 

469 self.assertIsNone(baseURI) 

470 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

471 else: 

472 self.assertIsNotNone(baseURI) 

473 self.assertEqual(compURIs, {}) 

474 

475 metrics_get = datastore.get(ref) 

476 self.assertEqual(metrics_get, metrics) 

477 

478 # Retrieve the composite with read parameter 

479 stop = 4 

480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

481 self.assertEqual(metrics_get.summary, metrics.summary) 

482 self.assertEqual(metrics_get.output, metrics.output) 

483 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

484 

485 # Retrieve a component 

486 data = datastore.get(ref.makeComponentRef("data")) 

487 self.assertEqual(data, metrics.data) 

488 

489 # On supported storage classes attempt to access a read 

490 # only component 

491 if "ReadComp" in sc.name: 

492 cRef = ref.makeComponentRef("counter") 

493 counter = datastore.get(cRef) 

494 self.assertEqual(counter, len(metrics.data)) 

495 

496 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

497 self.assertEqual(counter, stop) 

498 

499 datastore.remove(ref) 

500 

501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

502 metrics = makeExampleMetrics() 

503 datastore = self.makeDatastore() 

504 # Put 

505 dimensions = self.universe.extract(("visit", "physical_filter")) 

506 sc = self.storageClassFactory.getStorageClass("StructuredData") 

507 refs = [] 

508 for i in range(n_refs): 

509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

511 datastore.put(metrics, ref) 

512 

513 # Does it exist? 

514 self.assertTrue(datastore.exists(ref)) 

515 

516 # Get 

517 metricsOut = datastore.get(ref) 

518 self.assertEqual(metrics, metricsOut) 

519 refs.append(ref) 

520 

521 return datastore, *refs 

522 

523 def testRemove(self) -> None: 

524 datastore, ref = self.prepDeleteTest() 

525 

526 # Remove 

527 datastore.remove(ref) 

528 

529 # Does it exist? 

530 self.assertFalse(datastore.exists(ref)) 

531 

532 # Do we now get a predicted URI? 

533 uri = datastore.getURI(ref, predict=True) 

534 self.assertEqual(uri.fragment, "predicted") 

535 

536 # Get should now fail 

537 with self.assertRaises(FileNotFoundError): 

538 datastore.get(ref) 

539 # Can only delete once 

540 with self.assertRaises(FileNotFoundError): 

541 datastore.remove(ref) 

542 

543 def testForget(self) -> None: 

544 datastore, ref = self.prepDeleteTest() 

545 

546 # Remove 

547 datastore.forget([ref]) 

548 

549 # Does it exist (as far as we know)? 

550 self.assertFalse(datastore.exists(ref)) 

551 

552 # Do we now get a predicted URI? 

553 uri = datastore.getURI(ref, predict=True) 

554 self.assertEqual(uri.fragment, "predicted") 

555 

556 # Get should now fail 

557 with self.assertRaises(FileNotFoundError): 

558 datastore.get(ref) 

559 

560 # Forgetting again is a silent no-op 

561 datastore.forget([ref]) 

562 

563 # Predicted URI should still point to the file. 

564 self.assertTrue(uri.exists()) 

565 

566 def testTransfer(self) -> None: 

567 metrics = makeExampleMetrics() 

568 

569 dimensions = self.universe.extract(("visit", "physical_filter")) 

570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}) 

571 

572 sc = self.storageClassFactory.getStorageClass("StructuredData") 

573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

574 

575 inputDatastore = self.makeDatastore("test_input_datastore") 

576 outputDatastore = self.makeDatastore("test_output_datastore") 

577 

578 inputDatastore.put(metrics, ref) 

579 outputDatastore.transfer(inputDatastore, ref) 

580 

581 metricsOut = outputDatastore.get(ref) 

582 self.assertEqual(metrics, metricsOut) 

583 

584 def testBasicTransaction(self) -> None: 

585 datastore = self.makeDatastore() 

586 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

587 dimensions = self.universe.extract(("visit", "physical_filter")) 

588 nDatasets = 6 

589 dataIds = [ 

590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"}) 

591 for i in range(nDatasets) 

592 ] 

593 data = [ 

594 ( 

595 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

596 makeExampleMetrics(), 

597 ) 

598 for dataId in dataIds 

599 ] 

600 succeed = data[: nDatasets // 2] 

601 fail = data[nDatasets // 2 :] 

602 # All datasets added in this transaction should continue to exist 

603 with datastore.transaction(): 

604 for ref, metrics in succeed: 

605 datastore.put(metrics, ref) 

606 # Whereas datasets added in this transaction should not 

607 with self.assertRaises(TransactionTestError): 

608 with datastore.transaction(): 

609 for ref, metrics in fail: 

610 datastore.put(metrics, ref) 

611 raise TransactionTestError("This should propagate out of the context manager") 

612 # Check for datasets that should exist 

613 for ref, metrics in succeed: 

614 # Does it exist? 

615 self.assertTrue(datastore.exists(ref)) 

616 # Get 

617 metricsOut = datastore.get(ref, parameters=None) 

618 self.assertEqual(metrics, metricsOut) 

619 # URI 

620 uri = datastore.getURI(ref) 

621 self.assertEqual(uri.scheme, self.uriScheme) 

622 # Check for datasets that should not exist 

623 for ref, _ in fail: 

624 # These should raise 

625 with self.assertRaises(FileNotFoundError): 

626 # non-existing file 

627 datastore.get(ref) 

628 with self.assertRaises(FileNotFoundError): 

629 datastore.getURI(ref) 

630 

631 def testNestedTransaction(self) -> None: 

632 datastore = self.makeDatastore() 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 metrics = makeExampleMetrics() 

636 

637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

639 datastore.put(metrics, refBefore) 

640 with self.assertRaises(TransactionTestError): 

641 with datastore.transaction(): 

642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}) 

643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

644 datastore.put(metrics, refOuter) 

645 with datastore.transaction(): 

646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}) 

647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

648 datastore.put(metrics, refInner) 

649 # All datasets should exist 

650 for ref in (refBefore, refOuter, refInner): 

651 metricsOut = datastore.get(ref, parameters=None) 

652 self.assertEqual(metrics, metricsOut) 

653 raise TransactionTestError("This should roll back the transaction") 

654 # Dataset(s) inserted before the transaction should still exist 

655 metricsOut = datastore.get(refBefore, parameters=None) 

656 self.assertEqual(metrics, metricsOut) 

657 # But all datasets inserted during the (rolled back) transaction 

658 # should be gone 

659 with self.assertRaises(FileNotFoundError): 

660 datastore.get(refOuter) 

661 with self.assertRaises(FileNotFoundError): 

662 datastore.get(refInner) 

663 

664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

665 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

666 dimensions = self.universe.extract(("visit", "physical_filter")) 

667 metrics = makeExampleMetrics() 

668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

670 return metrics, ref 

671 

672 def runIngestTest( 

673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

674 ) -> None: 

675 metrics, ref = self._prepareIngestTest() 

676 # The file will be deleted after the test. 

677 # For symlink tests this leads to a situation where the datastore 

678 # points to a file that does not exist. This will make os.path.exist 

679 # return False but then the new symlink will fail with 

680 # FileExistsError later in the code so the test still passes. 

681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

682 with open(path, "w") as fd: 

683 yaml.dump(metrics._asdict(), stream=fd) 

684 func(metrics, path, ref) 

685 

686 def testIngestNoTransfer(self) -> None: 

687 """Test ingesting existing files with no transfer.""" 

688 for mode in (None, "auto"): 

689 # Some datastores have auto but can't do in place transfer 

690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

691 continue 

692 

693 with self.subTest(mode=mode): 

694 datastore = self.makeDatastore() 

695 

696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

697 """Ingest a file already in the datastore root.""" 

698 # first move it into the root, and adjust the path 

699 # accordingly 

700 path = shutil.copy(path, datastore.root.ospath) 

701 path = os.path.relpath(path, start=datastore.root.ospath) 

702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

703 self.assertEqual(obj, datastore.get(ref)) 

704 

705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

706 """Can't ingest files if we're given a bad path.""" 

707 with self.assertRaises(FileNotFoundError): 

708 datastore.ingest( 

709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

710 ) 

711 self.assertFalse(datastore.exists(ref)) 

712 

713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

714 """Can't ingest files outside of datastore root unless 

715 auto. 

716 """ 

717 if mode == "auto": 

718 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

719 self.assertTrue(datastore.exists(ref)) 

720 else: 

721 with self.assertRaises(RuntimeError): 

722 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

723 self.assertFalse(datastore.exists(ref)) 

724 

725 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

726 with self.assertRaises(NotImplementedError): 

727 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

728 

729 if mode in self.ingestTransferModes: 

730 self.runIngestTest(failOutsideRoot) 

731 self.runIngestTest(failInputDoesNotExist) 

732 self.runIngestTest(succeed) 

733 else: 

734 self.runIngestTest(failNotImplemented) 

735 

736 def testIngestTransfer(self) -> None: 

737 """Test ingesting existing files after transferring them.""" 

738 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

739 with self.subTest(mode=mode): 

740 datastore = self.makeDatastore(mode) 

741 

742 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

743 """Ingest a file by transferring it to the template 

744 location. 

745 """ 

746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

747 self.assertEqual(obj, datastore.get(ref)) 

748 

749 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

750 """Can't ingest files if we're given a bad path.""" 

751 with self.assertRaises(FileNotFoundError): 

752 # Ensure the file does not look like it is in 

753 # datastore for auto mode 

754 datastore.ingest( 

755 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

756 ) 

757 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

758 

759 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

760 with self.assertRaises(NotImplementedError): 

761 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

762 

763 if mode in self.ingestTransferModes: 

764 self.runIngestTest(failInputDoesNotExist) 

765 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

766 else: 

767 self.runIngestTest(failNotImplemented) 

768 

769 def testIngestSymlinkOfSymlink(self) -> None: 

770 """Special test for symlink to a symlink ingest""" 

771 metrics, ref = self._prepareIngestTest() 

772 # The aim of this test is to create a dataset on disk, then 

773 # create a symlink to it and finally ingest the symlink such that 

774 # the symlink in the datastore points to the original dataset. 

775 for mode in ("symlink", "relsymlink"): 

776 if mode not in self.ingestTransferModes: 

777 continue 

778 

779 print(f"Trying mode {mode}") 

780 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

781 with open(realpath, "w") as fd: 

782 yaml.dump(metrics._asdict(), stream=fd) 

783 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

784 os.symlink(os.path.abspath(realpath), sympath) 

785 

786 datastore = self.makeDatastore() 

787 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

788 

789 uri = datastore.getURI(ref) 

790 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

791 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

792 

793 linkTarget = os.readlink(uri.ospath) 

794 if mode == "relsymlink": 

795 self.assertFalse(os.path.isabs(linkTarget)) 

796 else: 

797 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

798 

799 # Check that we can get the dataset back regardless of mode 

800 metric2 = datastore.get(ref) 

801 self.assertEqual(metric2, metrics) 

802 

803 # Cleanup the file for next time round loop 

804 # since it will get the same file name in store 

805 datastore.remove(ref) 

806 

807 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

808 datastore = self.makeDatastore(name) 

809 

810 # For now only the FileDatastore can be used for this test. 

811 # ChainedDatastore that only includes InMemoryDatastores have to be 

812 # skipped as well. 

813 for name in datastore.names: 

814 if not name.startswith("InMemoryDatastore"): 

815 break 

816 else: 

817 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

818 

819 metrics = makeExampleMetrics() 

820 dimensions = self.universe.extract(("visit", "physical_filter")) 

821 sc = self.storageClassFactory.getStorageClass("StructuredData") 

822 

823 refs = [] 

824 for visit in (2048, 2049, 2050): 

825 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

826 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

827 datastore.put(metrics, ref) 

828 refs.append(ref) 

829 return datastore, refs 

830 

831 def testExportImportRecords(self) -> None: 

832 """Test for export_records and import_records methods.""" 

833 datastore, refs = self._populate_export_datastore("test_datastore") 

834 for exported_refs in (refs, refs[1:]): 

835 n_refs = len(exported_refs) 

836 records = datastore.export_records(exported_refs) 

837 self.assertGreater(len(records), 0) 

838 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

839 # In a ChainedDatastore each FileDatastore will have a complete set 

840 for datastore_name in records: 

841 record_data = records[datastore_name] 

842 self.assertEqual(len(record_data.records), n_refs) 

843 

844 # Check that subsetting works, include non-existing dataset ID. 

845 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

846 subset = record_data.subset(dataset_ids) 

847 assert subset is not None 

848 self.assertEqual(len(subset.records), 1) 

849 subset = record_data.subset({uuid.uuid4()}) 

850 self.assertIsNone(subset) 

851 

852 # Use the same datastore name to import relative path. 

853 datastore2 = self.makeDatastore("test_datastore") 

854 

855 records = datastore.export_records(refs[1:]) 

856 datastore2.import_records(records) 

857 

858 with self.assertRaises(FileNotFoundError): 

859 data = datastore2.get(refs[0]) 

860 data = datastore2.get(refs[1]) 

861 self.assertIsNotNone(data) 

862 data = datastore2.get(refs[2]) 

863 self.assertIsNotNone(data) 

864 

865 def testExport(self) -> None: 

866 datastore, refs = self._populate_export_datastore("test_datastore") 

867 

868 datasets = list(datastore.export(refs)) 

869 self.assertEqual(len(datasets), 3) 

870 

871 for transfer in (None, "auto"): 

872 # Both will default to None 

873 datasets = list(datastore.export(refs, transfer=transfer)) 

874 self.assertEqual(len(datasets), 3) 

875 

876 with self.assertRaises(TypeError): 

877 list(datastore.export(refs, transfer="copy")) 

878 

879 with self.assertRaises(TypeError): 

880 list(datastore.export(refs, directory="exportDir", transfer="move")) 

881 

882 # Create a new ref that is not known to the datastore and try to 

883 # export it. 

884 sc = self.storageClassFactory.getStorageClass("ThingOne") 

885 dimensions = self.universe.extract(("visit", "physical_filter")) 

886 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

887 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

888 with self.assertRaises(FileNotFoundError): 

889 list(datastore.export(refs + [ref], transfer=None)) 

890 

891 def test_pydantic_dict_storage_class_conversions(self) -> None: 

892 """Test converting a dataset stored as a pydantic model into a dict on 

893 read. 

894 """ 

895 datastore = self.makeDatastore() 

896 store_as_model = self.makeDatasetRef( 

897 "store_as_model", 

898 dimensions=self.universe.empty, 

899 storageClass="DictConvertibleModel", 

900 dataId=DataCoordinate.makeEmpty(self.universe), 

901 ) 

902 content = {"a": "one", "b": "two"} 

903 model = DictConvertibleModel.from_dict(content, extra="original content") 

904 datastore.put(model, store_as_model) 

905 retrieved_model = datastore.get(store_as_model) 

906 self.assertEqual(retrieved_model, model) 

907 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

908 self.assertEqual(type(loaded), dict) 

909 self.assertEqual(loaded, content) 

910 

911 def test_simple_class_put_get(self) -> None: 

912 """Test that we can put and get a simple class with dict() 

913 constructor. 

914 """ 

915 datastore = self.makeDatastore() 

916 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

917 self._assert_different_puts(datastore, "MetricsExample", data) 

918 

919 def test_dataclass_put_get(self) -> None: 

920 """Test that we can put and get a simple dataclass.""" 

921 datastore = self.makeDatastore() 

922 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

923 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

924 

925 def test_pydantic_put_get(self) -> None: 

926 """Test that we can put and get a simple Pydantic model.""" 

927 datastore = self.makeDatastore() 

928 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

929 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

930 

931 def test_tuple_put_get(self) -> None: 

932 """Test that we can put and get a tuple.""" 

933 datastore = self.makeDatastore() 

934 data = tuple(["a", "b", 1]) 

935 self._assert_different_puts(datastore, "TupleExample", data) 

936 

937 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

938 refs = { 

939 x: self.makeDatasetRef( 

940 f"stora_as_{x}", 

941 dimensions=self.universe.empty, 

942 storageClass=f"{storageClass_root}{x}", 

943 dataId=DataCoordinate.makeEmpty(self.universe), 

944 ) 

945 for x in ["A", "B"] 

946 } 

947 

948 for ref in refs.values(): 

949 datastore.put(data, ref) 

950 

951 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

952 

953 

954class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

955 """PosixDatastore specialization""" 

956 

957 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

958 uriScheme = "file" 

959 canIngestNoTransferAuto = True 

960 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

961 isEphemeral = False 

962 rootKeys = ("root",) 

963 validationCanFail = True 

964 

965 def setUp(self) -> None: 

966 # Override the working directory before calling the base class 

967 self.root = tempfile.mkdtemp(dir=TESTDIR) 

968 super().setUp() 

969 

970 def testAtomicWrite(self) -> None: 

971 """Test that we write to a temporary and then rename""" 

972 datastore = self.makeDatastore() 

973 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

974 dimensions = self.universe.extract(("visit", "physical_filter")) 

975 metrics = makeExampleMetrics() 

976 

977 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

978 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

979 

980 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

981 datastore.put(metrics, ref) 

982 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

983 self.assertIn("transfer=move", move_logs[0]) 

984 

985 # And the transfer should be file to file. 

986 self.assertEqual(move_logs[0].count("file://"), 2) 

987 

988 def testCanNotDeterminePutFormatterLocation(self) -> None: 

989 """Verify that the expected exception is raised if the FileDatastore 

990 can not determine the put formatter location. 

991 """ 

992 _ = makeExampleMetrics() 

993 datastore = self.makeDatastore() 

994 

995 # Create multiple storage classes for testing different formulations 

996 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

997 

998 sccomp = StorageClass("Dummy") 

999 compositeStorageClass = StorageClass( 

1000 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1001 ) 

1002 

1003 dimensions = self.universe.extract(("visit", "physical_filter")) 

1004 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1005 

1006 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1007 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1008 

1009 def raiser(ref: DatasetRef) -> None: 

1010 raise DatasetTypeNotSupportedError() 

1011 

1012 with unittest.mock.patch.object( 

1013 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1014 "_determine_put_formatter_location", 

1015 side_effect=raiser, 

1016 ): 

1017 # verify the non-composite ref execution path: 

1018 with self.assertRaises(DatasetTypeNotSupportedError): 

1019 datastore.getURIs(ref, predict=True) 

1020 

1021 # verify the composite-ref execution path: 

1022 with self.assertRaises(DatasetTypeNotSupportedError): 

1023 datastore.getURIs(compRef, predict=True) 

1024 

1025 def test_roots(self): 

1026 datastore = self.makeDatastore() 

1027 

1028 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1029 for root in datastore.roots.values(): 

1030 if root is not None: 

1031 self.assertTrue(root.exists()) 

1032 

1033 

1034class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1035 """Posix datastore tests but with checksums disabled.""" 

1036 

1037 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1038 

1039 def testChecksum(self) -> None: 

1040 """Ensure that checksums have not been calculated.""" 

1041 datastore = self.makeDatastore() 

1042 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1043 dimensions = self.universe.extract(("visit", "physical_filter")) 

1044 metrics = makeExampleMetrics() 

1045 

1046 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

1047 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1048 

1049 # Configuration should have disabled checksum calculation 

1050 datastore.put(metrics, ref) 

1051 infos = datastore.getStoredItemsInfo(ref) 

1052 self.assertIsNone(infos[0].checksum) 

1053 

1054 # Remove put back but with checksums enabled explicitly 

1055 datastore.remove(ref) 

1056 datastore.useChecksum = True 

1057 datastore.put(metrics, ref) 

1058 

1059 infos = datastore.getStoredItemsInfo(ref) 

1060 self.assertIsNotNone(infos[0].checksum) 

1061 

1062 

1063class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1064 """Restrict trash test to FileDatastore.""" 

1065 

1066 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1067 

1068 def testTrash(self) -> None: 

1069 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1070 

1071 # Trash one of them. 

1072 ref = refs.pop() 

1073 uri = datastore.getURI(ref) 

1074 datastore.trash(ref) 

1075 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1076 datastore.emptyTrash() 

1077 self.assertFalse(uri.exists(), uri) 

1078 

1079 # Trash it again should be fine. 

1080 datastore.trash(ref) 

1081 

1082 # Trash multiple items at once. 

1083 subset = [refs.pop(), refs.pop()] 

1084 datastore.trash(subset) 

1085 datastore.emptyTrash() 

1086 

1087 # Remove a record and trash should do nothing. 

1088 # This is execution butler scenario. 

1089 ref = refs.pop() 

1090 uri = datastore.getURI(ref) 

1091 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1092 self.assertTrue(uri.exists()) 

1093 datastore.trash(ref) 

1094 datastore.emptyTrash() 

1095 self.assertTrue(uri.exists()) 

1096 

1097 # Switch on trust and it should delete the file. 

1098 datastore.trustGetRequest = True 

1099 datastore.trash([ref]) 

1100 self.assertFalse(uri.exists()) 

1101 

1102 # Remove multiples at once in trust mode. 

1103 subset = [refs.pop() for i in range(3)] 

1104 datastore.trash(subset) 

1105 datastore.trash(refs.pop()) # Check that a single ref can trash 

1106 

1107 

1108class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1109 """Test datastore cleans up on failure.""" 

1110 

1111 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1112 

1113 def setUp(self) -> None: 

1114 # Override the working directory before calling the base class 

1115 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1116 super().setUp() 

1117 

1118 def testCleanup(self) -> None: 

1119 """Test that a failed formatter write does cleanup a partial file.""" 

1120 metrics = makeExampleMetrics() 

1121 datastore = self.makeDatastore() 

1122 

1123 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1124 

1125 dimensions = self.universe.extract(("visit", "physical_filter")) 

1126 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1127 

1128 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1129 

1130 # Determine where the file will end up (we assume Formatters use 

1131 # the same file extension) 

1132 expectedUri = datastore.getURI(ref, predict=True) 

1133 self.assertEqual(expectedUri.fragment, "predicted") 

1134 

1135 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1136 

1137 # Try formatter that fails and formatter that fails and leaves 

1138 # a file behind 

1139 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1140 with self.subTest(formatter=formatter): 

1141 # Monkey patch the formatter 

1142 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1143 

1144 # Try to put the dataset, it should fail 

1145 with self.assertRaises(Exception): 

1146 datastore.put(metrics, ref) 

1147 

1148 # Check that there is no file on disk 

1149 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1150 

1151 # Check that there is a directory 

1152 dir = expectedUri.dirname() 

1153 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1154 

1155 # Force YamlFormatter and check that this time a file is written 

1156 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1157 datastore.put(metrics, ref) 

1158 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1159 datastore.remove(ref) 

1160 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1161 

1162 

1163class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1164 """PosixDatastore specialization""" 

1165 

1166 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1167 uriScheme = "mem" 

1168 hasUnsupportedPut = False 

1169 ingestTransferModes = () 

1170 isEphemeral = True 

1171 rootKeys = None 

1172 validationCanFail = False 

1173 

1174 

1175class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1176 """ChainedDatastore specialization using a POSIXDatastore""" 

1177 

1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1179 hasUnsupportedPut = False 

1180 canIngestNoTransferAuto = False 

1181 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1182 isEphemeral = False 

1183 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1184 validationCanFail = True 

1185 

1186 

1187class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1188 """ChainedDatastore specialization using all InMemoryDatastore""" 

1189 

1190 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1191 validationCanFail = False 

1192 

1193 

1194class DatastoreConstraintsTests(DatastoreTestsBase): 

1195 """Basic tests of constraints model of Datastores.""" 

1196 

1197 def testConstraints(self) -> None: 

1198 """Test constraints model. Assumes that each test class has the 

1199 same constraints. 

1200 """ 

1201 metrics = makeExampleMetrics() 

1202 datastore = self.makeDatastore() 

1203 

1204 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1205 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1206 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1207 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}) 

1208 

1209 # Write empty file suitable for ingest check (JSON and YAML variants) 

1210 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1211 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1212 for datasetTypeName, sc, accepted in ( 

1213 ("metric", sc1, True), 

1214 ("metric5", sc1, False), 

1215 ("metric33", sc1, True), 

1216 ("metric5", sc2, True), 

1217 ): 

1218 # Choose different temp file depending on StorageClass 

1219 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1220 

1221 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1222 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1223 if accepted: 

1224 datastore.put(metrics, ref) 

1225 self.assertTrue(datastore.exists(ref)) 

1226 datastore.remove(ref) 

1227 

1228 # Try ingest 

1229 if self.canIngest: 

1230 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1231 self.assertTrue(datastore.exists(ref)) 

1232 datastore.remove(ref) 

1233 else: 

1234 with self.assertRaises(DatasetTypeNotSupportedError): 

1235 datastore.put(metrics, ref) 

1236 self.assertFalse(datastore.exists(ref)) 

1237 

1238 # Again with ingest 

1239 if self.canIngest: 

1240 with self.assertRaises(DatasetTypeNotSupportedError): 

1241 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1242 self.assertFalse(datastore.exists(ref)) 

1243 

1244 

1245class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1246 """PosixDatastore specialization""" 

1247 

1248 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1249 canIngest = True 

1250 

1251 def setUp(self) -> None: 

1252 # Override the working directory before calling the base class 

1253 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1254 super().setUp() 

1255 

1256 

1257class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1258 """InMemoryDatastore specialization.""" 

1259 

1260 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1261 canIngest = False 

1262 

1263 

1264class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1265 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1266 at the ChainedDatstore. 

1267 """ 

1268 

1269 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1270 

1271 

1272class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1273 """ChainedDatastore specialization using a POSIXDatastore.""" 

1274 

1275 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1276 

1277 

1278class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1279 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1280 

1281 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1282 canIngest = False 

1283 

1284 

1285class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1286 """Test that a chained datastore can control constraints per-datastore 

1287 even if child datastore would accept. 

1288 """ 

1289 

1290 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1291 

1292 def setUp(self) -> None: 

1293 # Override the working directory before calling the base class 

1294 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1295 super().setUp() 

1296 

1297 def testConstraints(self) -> None: 

1298 """Test chained datastore constraints model.""" 

1299 metrics = makeExampleMetrics() 

1300 datastore = self.makeDatastore() 

1301 

1302 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1303 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1304 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1305 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1306 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1307 

1308 # Write empty file suitable for ingest check (JSON and YAML variants) 

1309 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1310 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1311 

1312 for typeName, dataId, sc, accept, ingest in ( 

1313 ("metric", dataId1, sc1, (False, True, False), True), 

1314 ("metric5", dataId1, sc1, (False, False, False), False), 

1315 ("metric5", dataId2, sc1, (True, False, False), False), 

1316 ("metric33", dataId2, sc2, (True, True, False), True), 

1317 ("metric5", dataId1, sc2, (False, True, False), True), 

1318 ): 

1319 # Choose different temp file depending on StorageClass 

1320 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1321 

1322 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1323 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1324 if any(accept): 

1325 datastore.put(metrics, ref) 

1326 self.assertTrue(datastore.exists(ref)) 

1327 

1328 # Check each datastore inside the chained datastore 

1329 for childDatastore, expected in zip(datastore.datastores, accept): 

1330 self.assertEqual( 

1331 childDatastore.exists(ref), 

1332 expected, 

1333 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1334 ) 

1335 

1336 datastore.remove(ref) 

1337 

1338 # Check that ingest works 

1339 if ingest: 

1340 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1341 self.assertTrue(datastore.exists(ref)) 

1342 

1343 # Check each datastore inside the chained datastore 

1344 for childDatastore, expected in zip(datastore.datastores, accept): 

1345 # Ephemeral datastores means InMemory at the moment 

1346 # and that does not accept ingest of files. 

1347 if childDatastore.isEphemeral: 

1348 expected = False 

1349 self.assertEqual( 

1350 childDatastore.exists(ref), 

1351 expected, 

1352 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1353 ) 

1354 

1355 datastore.remove(ref) 

1356 else: 

1357 with self.assertRaises(DatasetTypeNotSupportedError): 

1358 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1359 

1360 else: 

1361 with self.assertRaises(DatasetTypeNotSupportedError): 

1362 datastore.put(metrics, ref) 

1363 self.assertFalse(datastore.exists(ref)) 

1364 

1365 # Again with ingest 

1366 with self.assertRaises(DatasetTypeNotSupportedError): 

1367 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1368 self.assertFalse(datastore.exists(ref)) 

1369 

1370 

1371class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1372 """Tests for datastore caching infrastructure.""" 

1373 

1374 @classmethod 

1375 def setUpClass(cls) -> None: 

1376 cls.storageClassFactory = StorageClassFactory() 

1377 cls.universe = DimensionUniverse() 

1378 

1379 # Ensure that we load the test storage class definitions. 

1380 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1381 cls.storageClassFactory.addFromConfig(scConfigFile) 

1382 

1383 def setUp(self) -> None: 

1384 self.id = 0 

1385 

1386 # Create a root that we can use for caching tests. 

1387 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1388 

1389 # Create some test dataset refs and associated test files 

1390 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1391 dimensions = self.universe.extract(("visit", "physical_filter")) 

1392 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1393 

1394 # Create list of refs and list of temporary files 

1395 n_datasets = 10 

1396 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1397 

1398 root_uri = ResourcePath(self.root, forceDirectory=True) 

1399 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1400 

1401 # Create test files. 

1402 for uri in self.files: 

1403 uri.write(b"0123456789") 

1404 

1405 # Create some composite refs with component files. 

1406 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1407 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1408 self.comp_files = [] 

1409 self.comp_refs = [] 

1410 for n, ref in enumerate(self.composite_refs): 

1411 component_refs = [] 

1412 component_files = [] 

1413 for component in sc.components: 

1414 component_ref = ref.makeComponentRef(component) 

1415 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1416 component_refs.append(component_ref) 

1417 component_files.append(file) 

1418 file.write(b"9876543210") 

1419 

1420 self.comp_files.append(component_files) 

1421 self.comp_refs.append(component_refs) 

1422 

1423 def tearDown(self) -> None: 

1424 if self.root is not None and os.path.exists(self.root): 

1425 shutil.rmtree(self.root, ignore_errors=True) 

1426 

1427 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1428 config = Config.fromYaml(config_str) 

1429 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1430 

1431 def testNoCacheDir(self) -> None: 

1432 config_str = """ 

1433cached: 

1434 root: null 

1435 cacheable: 

1436 metric0: true 

1437 """ 

1438 cache_manager = self._make_cache_manager(config_str) 

1439 

1440 # Look inside to check we don't have a cache directory 

1441 self.assertIsNone(cache_manager._cache_directory) 

1442 

1443 self.assertCache(cache_manager) 

1444 

1445 # Test that the cache directory is marked temporary 

1446 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1447 

1448 def testNoCacheDirReversed(self) -> None: 

1449 """Use default caching status and metric1 to false""" 

1450 config_str = """ 

1451cached: 

1452 root: null 

1453 default: true 

1454 cacheable: 

1455 metric1: false 

1456 """ 

1457 cache_manager = self._make_cache_manager(config_str) 

1458 

1459 self.assertCache(cache_manager) 

1460 

1461 def testEnvvarCacheDir(self) -> None: 

1462 config_str = f""" 

1463cached: 

1464 root: '{self.root}' 

1465 cacheable: 

1466 metric0: true 

1467 """ 

1468 

1469 root = ResourcePath(self.root, forceDirectory=True) 

1470 env_dir = root.join("somewhere", forceDirectory=True) 

1471 elsewhere = root.join("elsewhere", forceDirectory=True) 

1472 

1473 # Environment variable should override the config value. 

1474 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1475 cache_manager = self._make_cache_manager(config_str) 

1476 self.assertEqual(cache_manager.cache_directory, env_dir) 

1477 

1478 # This environment variable should not override the config value. 

1479 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1480 cache_manager = self._make_cache_manager(config_str) 

1481 self.assertEqual(cache_manager.cache_directory, root) 

1482 

1483 # No default setting. 

1484 config_str = """ 

1485cached: 

1486 root: null 

1487 default: true 

1488 cacheable: 

1489 metric1: false 

1490 """ 

1491 cache_manager = self._make_cache_manager(config_str) 

1492 

1493 # This environment variable should override the config value. 

1494 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1495 cache_manager = self._make_cache_manager(config_str) 

1496 self.assertEqual(cache_manager.cache_directory, env_dir) 

1497 

1498 # If both environment variables are set the main (not IF_UNSET) 

1499 # variable should win. 

1500 with unittest.mock.patch.dict( 

1501 os.environ, 

1502 { 

1503 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1504 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1505 }, 

1506 ): 

1507 cache_manager = self._make_cache_manager(config_str) 

1508 self.assertEqual(cache_manager.cache_directory, env_dir) 

1509 

1510 # Use the API to set the environment variable, making sure that the 

1511 # variable is reset on exit. 

1512 with unittest.mock.patch.dict( 

1513 os.environ, 

1514 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1515 ): 

1516 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1517 self.assertTrue(defined) 

1518 cache_manager = self._make_cache_manager(config_str) 

1519 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1520 

1521 # Now create the cache manager ahead of time and set the fallback 

1522 # later. 

1523 cache_manager = self._make_cache_manager(config_str) 

1524 self.assertIsNone(cache_manager._cache_directory) 

1525 with unittest.mock.patch.dict( 

1526 os.environ, 

1527 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1528 ): 

1529 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1530 self.assertTrue(defined) 

1531 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1532 

1533 def testExplicitCacheDir(self) -> None: 

1534 config_str = f""" 

1535cached: 

1536 root: '{self.root}' 

1537 cacheable: 

1538 metric0: true 

1539 """ 

1540 cache_manager = self._make_cache_manager(config_str) 

1541 

1542 # Look inside to check we do have a cache directory. 

1543 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1544 

1545 self.assertCache(cache_manager) 

1546 

1547 # Test that the cache directory is not marked temporary 

1548 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1549 

1550 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1551 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1552 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1553 

1554 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1555 self.assertIsInstance(uri, ResourcePath) 

1556 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1557 

1558 # Check presence in cache using ref and then using file extension. 

1559 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1560 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1561 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1562 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1563 

1564 # Cached file should no longer exist but uncached file should be 

1565 # unaffected. 

1566 self.assertFalse(self.files[0].exists()) 

1567 self.assertTrue(self.files[1].exists()) 

1568 

1569 # Should find this file and it should be within the cache directory. 

1570 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1571 self.assertTrue(found.exists()) 

1572 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1573 

1574 # Should not be able to find these in cache 

1575 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1576 self.assertIsNone(found) 

1577 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1578 self.assertIsNone(found) 

1579 

1580 def testNoCache(self) -> None: 

1581 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1582 for uri, ref in zip(self.files, self.refs): 

1583 self.assertFalse(cache_manager.should_be_cached(ref)) 

1584 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1585 self.assertFalse(cache_manager.known_to_cache(ref)) 

1586 with cache_manager.find_in_cache(ref, ".txt") as found: 

1587 self.assertIsNone(found, msg=f"{cache_manager}") 

1588 

1589 def _expiration_config(self, mode: str, threshold: int) -> str: 

1590 return f""" 

1591cached: 

1592 default: true 

1593 expiry: 

1594 mode: {mode} 

1595 threshold: {threshold} 

1596 cacheable: 

1597 unused: true 

1598 """ 

1599 

1600 def testCacheExpiryFiles(self) -> None: 

1601 threshold = 2 # Keep at least 2 files. 

1602 mode = "files" 

1603 config_str = self._expiration_config(mode, threshold) 

1604 

1605 cache_manager = self._make_cache_manager(config_str) 

1606 

1607 # Check that an empty cache returns unknown for arbitrary ref 

1608 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1609 

1610 # Should end with datasets: 2, 3, 4 

1611 self.assertExpiration(cache_manager, 5, threshold + 1) 

1612 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1613 

1614 # Check that we will not expire a file that is actively in use. 

1615 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1616 self.assertIsNotNone(found) 

1617 

1618 # Trigger cache expiration that should remove the file 

1619 # we just retrieved. Should now have: 3, 4, 5 

1620 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1621 self.assertIsNotNone(cached) 

1622 

1623 # Cache should still report the standard file count. 

1624 self.assertEqual(cache_manager.file_count, threshold + 1) 

1625 

1626 # Add additional entry to cache. 

1627 # Should now have 4, 5, 6 

1628 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1629 self.assertIsNotNone(cached) 

1630 

1631 # Is the file still there? 

1632 self.assertTrue(found.exists()) 

1633 

1634 # Can we read it? 

1635 data = found.read() 

1636 self.assertGreater(len(data), 0) 

1637 

1638 # Outside context the file should no longer exist. 

1639 self.assertFalse(found.exists()) 

1640 

1641 # File count should not have changed. 

1642 self.assertEqual(cache_manager.file_count, threshold + 1) 

1643 

1644 # Dataset 2 was in the exempt directory but because hardlinks 

1645 # are used it was deleted from the main cache during cache expiry 

1646 # above and so should no longer be found. 

1647 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1648 self.assertIsNone(found) 

1649 

1650 # And the one stored after it is also gone. 

1651 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1652 self.assertIsNone(found) 

1653 

1654 # But dataset 4 is present. 

1655 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1656 self.assertIsNotNone(found) 

1657 

1658 # Adding a new dataset to the cache should now delete it. 

1659 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1660 

1661 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1662 self.assertIsNone(found) 

1663 

1664 def testCacheExpiryDatasets(self) -> None: 

1665 threshold = 2 # Keep 2 datasets. 

1666 mode = "datasets" 

1667 config_str = self._expiration_config(mode, threshold) 

1668 

1669 cache_manager = self._make_cache_manager(config_str) 

1670 self.assertExpiration(cache_manager, 5, threshold + 1) 

1671 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1672 

1673 def testCacheExpiryDatasetsComposite(self) -> None: 

1674 threshold = 2 # Keep 2 datasets. 

1675 mode = "datasets" 

1676 config_str = self._expiration_config(mode, threshold) 

1677 

1678 cache_manager = self._make_cache_manager(config_str) 

1679 

1680 n_datasets = 3 

1681 for i in range(n_datasets): 

1682 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1683 cached = cache_manager.move_to_cache(component_file, component_ref) 

1684 self.assertIsNotNone(cached) 

1685 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1686 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1687 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1688 

1689 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1690 

1691 # Write two new non-composite and the number of files should drop. 

1692 self.assertExpiration(cache_manager, 2, 5) 

1693 

1694 def testCacheExpirySize(self) -> None: 

1695 threshold = 55 # Each file is 10 bytes 

1696 mode = "size" 

1697 config_str = self._expiration_config(mode, threshold) 

1698 

1699 cache_manager = self._make_cache_manager(config_str) 

1700 self.assertExpiration(cache_manager, 10, 6) 

1701 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1702 

1703 def assertExpiration( 

1704 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1705 ) -> None: 

1706 """Insert the datasets and then check the number retained.""" 

1707 for i in range(n_datasets): 

1708 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1709 self.assertIsNotNone(cached) 

1710 

1711 self.assertEqual(cache_manager.file_count, n_retained) 

1712 

1713 # The oldest file should not be in the cache any more. 

1714 for i in range(n_datasets): 

1715 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1716 if i >= n_datasets - n_retained: 

1717 self.assertIsInstance(found, ResourcePath) 

1718 else: 

1719 self.assertIsNone(found) 

1720 

1721 def testCacheExpiryAge(self) -> None: 

1722 threshold = 1 # Expire older than 2 seconds 

1723 mode = "age" 

1724 config_str = self._expiration_config(mode, threshold) 

1725 

1726 cache_manager = self._make_cache_manager(config_str) 

1727 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1728 

1729 # Insert 3 files, then sleep, then insert more. 

1730 for i in range(2): 

1731 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1732 self.assertIsNotNone(cached) 

1733 time.sleep(2.0) 

1734 for j in range(4): 

1735 i = 2 + j # Continue the counting 

1736 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1737 self.assertIsNotNone(cached) 

1738 

1739 # Only the files written after the sleep should exist. 

1740 self.assertEqual(cache_manager.file_count, 4) 

1741 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1742 self.assertIsNone(found) 

1743 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1744 self.assertIsInstance(found, ResourcePath) 

1745 

1746 

1747class DatasetRefURIsTestCase(unittest.TestCase): 

1748 """Tests for DatasetRefURIs.""" 

1749 

1750 def testSequenceAccess(self) -> None: 

1751 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1752 uris = DatasetRefURIs() 

1753 

1754 self.assertEqual(len(uris), 2) 

1755 self.assertEqual(uris[0], None) 

1756 self.assertEqual(uris[1], {}) 

1757 

1758 primaryURI = ResourcePath("1/2/3") 

1759 componentURI = ResourcePath("a/b/c") 

1760 

1761 # affirm that DatasetRefURIs does not support MutableSequence functions 

1762 with self.assertRaises(TypeError): 

1763 uris[0] = primaryURI 

1764 with self.assertRaises(TypeError): 

1765 uris[1] = {"foo": componentURI} 

1766 

1767 # but DatasetRefURIs can be set by property name: 

1768 uris.primaryURI = primaryURI 

1769 uris.componentURIs = {"foo": componentURI} 

1770 self.assertEqual(uris.primaryURI, primaryURI) 

1771 self.assertEqual(uris[0], primaryURI) 

1772 

1773 primary, components = uris 

1774 self.assertEqual(primary, primaryURI) 

1775 self.assertEqual(components, {"foo": componentURI}) 

1776 

1777 def testRepr(self) -> None: 

1778 """Verify __repr__ output.""" 

1779 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1780 self.assertEqual( 

1781 repr(uris), 

1782 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1783 ) 

1784 

1785 

1786class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1787 """Test the StoredFileInfo class.""" 

1788 

1789 storageClassFactory = StorageClassFactory() 

1790 

1791 def test_StoredFileInfo(self) -> None: 

1792 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1793 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1794 

1795 record = dict( 

1796 storage_class="StructuredDataDict", 

1797 formatter="lsst.daf.butler.Formatter", 

1798 path="a/b/c.txt", 

1799 component="component", 

1800 dataset_id=ref.id, 

1801 checksum=None, 

1802 file_size=5, 

1803 ) 

1804 info = StoredFileInfo.from_record(record) 

1805 

1806 self.assertEqual(info.dataset_id, ref.id) 

1807 self.assertEqual(info.to_record(), record) 

1808 

1809 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1810 rebased = info.rebase(ref2) 

1811 self.assertEqual(rebased.dataset_id, ref2.id) 

1812 self.assertEqual(rebased.rebase(ref), info) 

1813 

1814 with self.assertRaises(TypeError): 

1815 rebased.update(formatter=42) 

1816 

1817 with self.assertRaises(ValueError): 

1818 rebased.update(something=42, new="42") 

1819 

1820 # Check that pickle works on StoredFileInfo. 

1821 pickled_info = pickle.dumps(info) 

1822 unpickled_info = pickle.loads(pickled_info) 

1823 self.assertEqual(unpickled_info, info) 

1824 

1825 

1826if __name__ == "__main__": 

1827 unittest.main()