Coverage for tests/test_datastore.py: 11%

1009 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-28 10:10 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 StorageClass, 

53 StorageClassFactory, 

54 StoredFileInfo, 

55) 

56from lsst.daf.butler.formatters.yaml import YamlFormatter 

57from lsst.daf.butler.tests import ( 

58 BadNoWriteFormatter, 

59 BadWriteFormatter, 

60 DatasetTestHelper, 

61 DatastoreTestHelper, 

62 DummyRegistry, 

63 MetricsExample, 

64 MetricsExampleDataclass, 

65 MetricsExampleModel, 

66) 

67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

68from lsst.daf.butler.tests.utils import TestCaseMixin 

69from lsst.resources import ResourcePath 

70from lsst.utils import doImport 

71 

72TESTDIR = os.path.dirname(__file__) 

73 

74 

75def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

76 """Make example dataset that can be stored in butler.""" 

77 if use_none: 

78 array = None 

79 else: 

80 array = [563, 234, 456.7, 105, 2054, -1045] 

81 return MetricsExample( 

82 {"AM1": 5.2, "AM2": 30.6}, 

83 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

84 array, 

85 ) 

86 

87 

88class TransactionTestError(Exception): 

89 """Specific error for transactions, to prevent misdiagnosing 

90 that might otherwise occur when a standard exception is used. 

91 """ 

92 

93 pass 

94 

95 

96class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

97 """Support routines for datastore testing""" 

98 

99 root: str | None = None 

100 universe: DimensionUniverse 

101 storageClassFactory: StorageClassFactory 

102 

103 @classmethod 

104 def setUpClass(cls) -> None: 

105 # Storage Classes are fixed for all datastores in these tests 

106 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

107 cls.storageClassFactory = StorageClassFactory() 

108 cls.storageClassFactory.addFromConfig(scConfigFile) 

109 

110 # Read the Datastore config so we can get the class 

111 # information (since we should not assume the constructor 

112 # name here, but rely on the configuration file itself) 

113 datastoreConfig = DatastoreConfig(cls.configFile) 

114 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

115 cls.universe = DimensionUniverse() 

116 

117 def setUp(self) -> None: 

118 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

119 

120 def tearDown(self) -> None: 

121 if self.root is not None and os.path.exists(self.root): 

122 shutil.rmtree(self.root, ignore_errors=True) 

123 

124 

125class DatastoreTests(DatastoreTestsBase): 

126 """Some basic tests of a simple datastore.""" 

127 

128 hasUnsupportedPut = True 

129 rootKeys: tuple[str, ...] | None = None 

130 isEphemeral: bool = False 

131 validationCanFail: bool = False 

132 

133 def testConfigRoot(self) -> None: 

134 full = DatastoreConfig(self.configFile) 

135 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

136 newroot = "/random/location" 

137 self.datastoreType.setConfigRoot(newroot, config, full) 

138 if self.rootKeys: 

139 for k in self.rootKeys: 

140 self.assertIn(newroot, config[k]) 

141 

142 def testConstructor(self) -> None: 

143 datastore = self.makeDatastore() 

144 self.assertIsNotNone(datastore) 

145 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

146 

147 def testConfigurationValidation(self) -> None: 

148 datastore = self.makeDatastore() 

149 sc = self.storageClassFactory.getStorageClass("ThingOne") 

150 datastore.validateConfiguration([sc]) 

151 

152 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

153 if self.validationCanFail: 

154 with self.assertRaises(DatastoreValidationError): 

155 datastore.validateConfiguration([sc2], logFailures=True) 

156 

157 dimensions = self.universe.extract(("visit", "physical_filter")) 

158 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

159 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

160 datastore.validateConfiguration([ref]) 

161 

162 def testParameterValidation(self) -> None: 

163 """Check that parameters are validated""" 

164 sc = self.storageClassFactory.getStorageClass("ThingOne") 

165 dimensions = self.universe.extract(("visit", "physical_filter")) 

166 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

168 datastore = self.makeDatastore() 

169 data = {1: 2, 3: 4} 

170 datastore.put(data, ref) 

171 newdata = datastore.get(ref) 

172 self.assertEqual(data, newdata) 

173 with self.assertRaises(KeyError): 

174 newdata = datastore.get(ref, parameters={"missing": 5}) 

175 

176 def testBasicPutGet(self) -> None: 

177 metrics = makeExampleMetrics() 

178 datastore = self.makeDatastore() 

179 

180 # Create multiple storage classes for testing different formulations 

181 storageClasses = [ 

182 self.storageClassFactory.getStorageClass(sc) 

183 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

184 ] 

185 

186 dimensions = self.universe.extract(("visit", "physical_filter")) 

187 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

188 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}) 

189 

190 for sc in storageClasses: 

191 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

192 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

193 

194 # Make sure that using getManyURIs without predicting before the 

195 # dataset has been put raises. 

196 with self.assertRaises(FileNotFoundError): 

197 datastore.getManyURIs([ref], predict=False) 

198 

199 # Make sure that using getManyURIs with predicting before the 

200 # dataset has been put predicts the URI. 

201 uris = datastore.getManyURIs([ref, ref2], predict=True) 

202 self.assertIn("52", uris[ref].primaryURI.geturl()) 

203 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

204 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

205 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

206 

207 datastore.put(metrics, ref) 

208 

209 # Does it exist? 

210 self.assertTrue(datastore.exists(ref)) 

211 self.assertTrue(datastore.knows(ref)) 

212 multi = datastore.knows_these([ref]) 

213 self.assertTrue(multi[ref]) 

214 multi = datastore.mexists([ref, ref2]) 

215 self.assertTrue(multi[ref]) 

216 self.assertFalse(multi[ref2]) 

217 

218 # Get 

219 metricsOut = datastore.get(ref, parameters=None) 

220 self.assertEqual(metrics, metricsOut) 

221 

222 uri = datastore.getURI(ref) 

223 self.assertEqual(uri.scheme, self.uriScheme) 

224 

225 uris = datastore.getManyURIs([ref]) 

226 self.assertEqual(len(uris), 1) 

227 ref, uri = uris.popitem() 

228 self.assertTrue(uri.primaryURI.exists()) 

229 self.assertFalse(uri.componentURIs) 

230 

231 # Get a component -- we need to construct new refs for them 

232 # with derived storage classes but with parent ID 

233 for comp in ("data", "output"): 

234 compRef = ref.makeComponentRef(comp) 

235 output = datastore.get(compRef) 

236 self.assertEqual(output, getattr(metricsOut, comp)) 

237 

238 uri = datastore.getURI(compRef) 

239 self.assertEqual(uri.scheme, self.uriScheme) 

240 

241 uris = datastore.getManyURIs([compRef]) 

242 self.assertEqual(len(uris), 1) 

243 

244 storageClass = sc 

245 

246 # Check that we can put a metric with None in a component and 

247 # get it back as None 

248 metricsNone = makeExampleMetrics(use_none=True) 

249 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

250 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

251 datastore.put(metricsNone, refNone) 

252 

253 comp = "data" 

254 for comp in ("data", "output"): 

255 compRef = refNone.makeComponentRef(comp) 

256 output = datastore.get(compRef) 

257 self.assertEqual(output, getattr(metricsNone, comp)) 

258 

259 # Check that a put fails if the dataset type is not supported 

260 if self.hasUnsupportedPut: 

261 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

262 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

263 with self.assertRaises(DatasetTypeNotSupportedError): 

264 datastore.put(metrics, ref) 

265 

266 # These should raise 

267 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

268 with self.assertRaises(FileNotFoundError): 

269 # non-existing file 

270 datastore.get(ref) 

271 

272 # Get a URI from it 

273 uri = datastore.getURI(ref, predict=True) 

274 self.assertEqual(uri.scheme, self.uriScheme) 

275 

276 with self.assertRaises(FileNotFoundError): 

277 datastore.getURI(ref) 

278 

279 def testTrustGetRequest(self) -> None: 

280 """Check that we can get datasets that registry knows nothing about.""" 

281 datastore = self.makeDatastore() 

282 

283 # Skip test if the attribute is not defined 

284 if not hasattr(datastore, "trustGetRequest"): 

285 return 

286 

287 metrics = makeExampleMetrics() 

288 

289 i = 0 

290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

291 i += 1 

292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

293 

294 if sc_name == "StructuredComposite": 

295 disassembled = True 

296 else: 

297 disassembled = False 

298 

299 # Start datastore in default configuration of using registry 

300 datastore.trustGetRequest = False 

301 

302 # Create multiple storage classes for testing with or without 

303 # disassembly 

304 sc = self.storageClassFactory.getStorageClass(sc_name) 

305 dimensions = self.universe.extract(("visit", "physical_filter")) 

306 

307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}) 

308 

309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

310 datastore.put(metrics, ref) 

311 

312 # Does it exist? 

313 self.assertTrue(datastore.exists(ref)) 

314 self.assertTrue(datastore.knows(ref)) 

315 multi = datastore.knows_these([ref]) 

316 self.assertTrue(multi[ref]) 

317 multi = datastore.mexists([ref]) 

318 self.assertTrue(multi[ref]) 

319 

320 # Get 

321 metricsOut = datastore.get(ref) 

322 self.assertEqual(metrics, metricsOut) 

323 

324 # Get the URI(s) 

325 primaryURI, componentURIs = datastore.getURIs(ref) 

326 if disassembled: 

327 self.assertIsNone(primaryURI) 

328 self.assertEqual(len(componentURIs), 3) 

329 else: 

330 self.assertIn(datasetTypeName, primaryURI.path) 

331 self.assertFalse(componentURIs) 

332 

333 # Delete registry entry so now we are trusting 

334 datastore.removeStoredItemInfo(ref) 

335 

336 # Now stop trusting and check that things break 

337 datastore.trustGetRequest = False 

338 

339 # Does it exist? 

340 self.assertFalse(datastore.exists(ref)) 

341 self.assertFalse(datastore.knows(ref)) 

342 multi = datastore.knows_these([ref]) 

343 self.assertFalse(multi[ref]) 

344 multi = datastore.mexists([ref]) 

345 self.assertFalse(multi[ref]) 

346 

347 with self.assertRaises(FileNotFoundError): 

348 datastore.get(ref) 

349 

350 if sc_name != "StructuredDataNoComponents": 

351 with self.assertRaises(FileNotFoundError): 

352 datastore.get(ref.makeComponentRef("data")) 

353 

354 # URI should fail unless we ask for prediction 

355 with self.assertRaises(FileNotFoundError): 

356 datastore.getURIs(ref) 

357 

358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

359 if disassembled: 

360 self.assertIsNone(predicted_primary) 

361 self.assertEqual(len(predicted_disassembled), 3) 

362 for uri in predicted_disassembled.values(): 

363 self.assertEqual(uri.fragment, "predicted") 

364 self.assertIn(datasetTypeName, uri.path) 

365 else: 

366 self.assertIn(datasetTypeName, predicted_primary.path) 

367 self.assertFalse(predicted_disassembled) 

368 self.assertEqual(predicted_primary.fragment, "predicted") 

369 

370 # Now enable registry-free trusting mode 

371 datastore.trustGetRequest = True 

372 

373 # Try again to get it 

374 metricsOut = datastore.get(ref) 

375 self.assertEqual(metricsOut, metrics) 

376 

377 # Does it exist? 

378 self.assertTrue(datastore.exists(ref)) 

379 

380 # Get a component 

381 if sc_name != "StructuredDataNoComponents": 

382 comp = "data" 

383 compRef = ref.makeComponentRef(comp) 

384 output = datastore.get(compRef) 

385 self.assertEqual(output, getattr(metrics, comp)) 

386 

387 # Get the URI -- if we trust this should work even without 

388 # enabling prediction. 

389 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

390 self.assertEqual(primaryURI2, primaryURI) 

391 self.assertEqual(componentURIs2, componentURIs) 

392 

393 # Check for compatible storage class. 

394 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

395 # Make new dataset ref with compatible storage class. 

396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

397 

398 # Without `set_retrieve_dataset_type_method` it will fail to 

399 # find correct file. 

400 self.assertFalse(datastore.exists(ref_comp)) 

401 with self.assertRaises(FileNotFoundError): 

402 datastore.get(ref_comp) 

403 with self.assertRaises(FileNotFoundError): 

404 datastore.get(ref, storageClass="StructuredDataDictJson") 

405 

406 # Need a special method to generate stored dataset type. 

407 def _stored_dataset_type(name: str) -> DatasetType: 

408 if name == ref.datasetType.name: 

409 return ref.datasetType 

410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

411 

412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

413 

414 # Storage class override with original dataset ref. 

415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

416 self.assertIsInstance(metrics_as_dict, dict) 

417 

418 # get() should return a dict now. 

419 metrics_as_dict = datastore.get(ref_comp) 

420 self.assertIsInstance(metrics_as_dict, dict) 

421 

422 # exists() should work as well. 

423 self.assertTrue(datastore.exists(ref_comp)) 

424 

425 datastore.set_retrieve_dataset_type_method(None) 

426 

427 def testDisassembly(self) -> None: 

428 """Test disassembly within datastore.""" 

429 metrics = makeExampleMetrics() 

430 if self.isEphemeral: 

431 # in-memory datastore does not disassemble 

432 return 

433 

434 # Create multiple storage classes for testing different formulations 

435 # of composites. One of these will not disassemble to provide 

436 # a reference. 

437 storageClasses = [ 

438 self.storageClassFactory.getStorageClass(sc) 

439 for sc in ( 

440 "StructuredComposite", 

441 "StructuredCompositeTestA", 

442 "StructuredCompositeTestB", 

443 "StructuredCompositeReadComp", 

444 "StructuredData", # No disassembly 

445 "StructuredCompositeReadCompNoDisassembly", 

446 ) 

447 ] 

448 

449 # Create the test datastore 

450 datastore = self.makeDatastore() 

451 

452 # Dummy dataId 

453 dimensions = self.universe.extract(("visit", "physical_filter")) 

454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

455 

456 for i, sc in enumerate(storageClasses): 

457 with self.subTest(storageClass=sc.name): 

458 # Create a different dataset type each time round 

459 # so that a test failure in this subtest does not trigger 

460 # a cascade of tests because of file clashes 

461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

462 

463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

464 

465 datastore.put(metrics, ref) 

466 

467 baseURI, compURIs = datastore.getURIs(ref) 

468 if disassembled: 

469 self.assertIsNone(baseURI) 

470 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

471 else: 

472 self.assertIsNotNone(baseURI) 

473 self.assertEqual(compURIs, {}) 

474 

475 metrics_get = datastore.get(ref) 

476 self.assertEqual(metrics_get, metrics) 

477 

478 # Retrieve the composite with read parameter 

479 stop = 4 

480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

481 self.assertEqual(metrics_get.summary, metrics.summary) 

482 self.assertEqual(metrics_get.output, metrics.output) 

483 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

484 

485 # Retrieve a component 

486 data = datastore.get(ref.makeComponentRef("data")) 

487 self.assertEqual(data, metrics.data) 

488 

489 # On supported storage classes attempt to access a read 

490 # only component 

491 if "ReadComp" in sc.name: 

492 cRef = ref.makeComponentRef("counter") 

493 counter = datastore.get(cRef) 

494 self.assertEqual(counter, len(metrics.data)) 

495 

496 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

497 self.assertEqual(counter, stop) 

498 

499 datastore.remove(ref) 

500 

501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

502 metrics = makeExampleMetrics() 

503 datastore = self.makeDatastore() 

504 # Put 

505 dimensions = self.universe.extract(("visit", "physical_filter")) 

506 sc = self.storageClassFactory.getStorageClass("StructuredData") 

507 refs = [] 

508 for i in range(n_refs): 

509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

511 datastore.put(metrics, ref) 

512 

513 # Does it exist? 

514 self.assertTrue(datastore.exists(ref)) 

515 

516 # Get 

517 metricsOut = datastore.get(ref) 

518 self.assertEqual(metrics, metricsOut) 

519 refs.append(ref) 

520 

521 return datastore, *refs 

522 

523 def testRemove(self) -> None: 

524 datastore, ref = self.prepDeleteTest() 

525 

526 # Remove 

527 datastore.remove(ref) 

528 

529 # Does it exist? 

530 self.assertFalse(datastore.exists(ref)) 

531 

532 # Do we now get a predicted URI? 

533 uri = datastore.getURI(ref, predict=True) 

534 self.assertEqual(uri.fragment, "predicted") 

535 

536 # Get should now fail 

537 with self.assertRaises(FileNotFoundError): 

538 datastore.get(ref) 

539 # Can only delete once 

540 with self.assertRaises(FileNotFoundError): 

541 datastore.remove(ref) 

542 

543 def testForget(self) -> None: 

544 datastore, ref = self.prepDeleteTest() 

545 

546 # Remove 

547 datastore.forget([ref]) 

548 

549 # Does it exist (as far as we know)? 

550 self.assertFalse(datastore.exists(ref)) 

551 

552 # Do we now get a predicted URI? 

553 uri = datastore.getURI(ref, predict=True) 

554 self.assertEqual(uri.fragment, "predicted") 

555 

556 # Get should now fail 

557 with self.assertRaises(FileNotFoundError): 

558 datastore.get(ref) 

559 

560 # Forgetting again is a silent no-op 

561 datastore.forget([ref]) 

562 

563 # Predicted URI should still point to the file. 

564 self.assertTrue(uri.exists()) 

565 

566 def testTransfer(self) -> None: 

567 metrics = makeExampleMetrics() 

568 

569 dimensions = self.universe.extract(("visit", "physical_filter")) 

570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}) 

571 

572 sc = self.storageClassFactory.getStorageClass("StructuredData") 

573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

574 

575 inputDatastore = self.makeDatastore("test_input_datastore") 

576 outputDatastore = self.makeDatastore("test_output_datastore") 

577 

578 inputDatastore.put(metrics, ref) 

579 outputDatastore.transfer(inputDatastore, ref) 

580 

581 metricsOut = outputDatastore.get(ref) 

582 self.assertEqual(metrics, metricsOut) 

583 

584 def testBasicTransaction(self) -> None: 

585 datastore = self.makeDatastore() 

586 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

587 dimensions = self.universe.extract(("visit", "physical_filter")) 

588 nDatasets = 6 

589 dataIds = [ 

590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"}) 

591 for i in range(nDatasets) 

592 ] 

593 data = [ 

594 ( 

595 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

596 makeExampleMetrics(), 

597 ) 

598 for dataId in dataIds 

599 ] 

600 succeed = data[: nDatasets // 2] 

601 fail = data[nDatasets // 2 :] 

602 # All datasets added in this transaction should continue to exist 

603 with datastore.transaction(): 

604 for ref, metrics in succeed: 

605 datastore.put(metrics, ref) 

606 # Whereas datasets added in this transaction should not 

607 with self.assertRaises(TransactionTestError): 

608 with datastore.transaction(): 

609 for ref, metrics in fail: 

610 datastore.put(metrics, ref) 

611 raise TransactionTestError("This should propagate out of the context manager") 

612 # Check for datasets that should exist 

613 for ref, metrics in succeed: 

614 # Does it exist? 

615 self.assertTrue(datastore.exists(ref)) 

616 # Get 

617 metricsOut = datastore.get(ref, parameters=None) 

618 self.assertEqual(metrics, metricsOut) 

619 # URI 

620 uri = datastore.getURI(ref) 

621 self.assertEqual(uri.scheme, self.uriScheme) 

622 # Check for datasets that should not exist 

623 for ref, _ in fail: 

624 # These should raise 

625 with self.assertRaises(FileNotFoundError): 

626 # non-existing file 

627 datastore.get(ref) 

628 with self.assertRaises(FileNotFoundError): 

629 datastore.getURI(ref) 

630 

631 def testNestedTransaction(self) -> None: 

632 datastore = self.makeDatastore() 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 metrics = makeExampleMetrics() 

636 

637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

639 datastore.put(metrics, refBefore) 

640 with self.assertRaises(TransactionTestError): 

641 with datastore.transaction(): 

642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}) 

643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

644 datastore.put(metrics, refOuter) 

645 with datastore.transaction(): 

646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}) 

647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

648 datastore.put(metrics, refInner) 

649 # All datasets should exist 

650 for ref in (refBefore, refOuter, refInner): 

651 metricsOut = datastore.get(ref, parameters=None) 

652 self.assertEqual(metrics, metricsOut) 

653 raise TransactionTestError("This should roll back the transaction") 

654 # Dataset(s) inserted before the transaction should still exist 

655 metricsOut = datastore.get(refBefore, parameters=None) 

656 self.assertEqual(metrics, metricsOut) 

657 # But all datasets inserted during the (rolled back) transaction 

658 # should be gone 

659 with self.assertRaises(FileNotFoundError): 

660 datastore.get(refOuter) 

661 with self.assertRaises(FileNotFoundError): 

662 datastore.get(refInner) 

663 

664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

665 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

666 dimensions = self.universe.extract(("visit", "physical_filter")) 

667 metrics = makeExampleMetrics() 

668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

670 return metrics, ref 

671 

672 def runIngestTest( 

673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

674 ) -> None: 

675 metrics, ref = self._prepareIngestTest() 

676 # The file will be deleted after the test. 

677 # For symlink tests this leads to a situation where the datastore 

678 # points to a file that does not exist. This will make os.path.exist 

679 # return False but then the new symlink will fail with 

680 # FileExistsError later in the code so the test still passes. 

681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

682 with open(path, "w") as fd: 

683 yaml.dump(metrics._asdict(), stream=fd) 

684 func(metrics, path, ref) 

685 

686 def testIngestNoTransfer(self) -> None: 

687 """Test ingesting existing files with no transfer.""" 

688 for mode in (None, "auto"): 

689 # Some datastores have auto but can't do in place transfer 

690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

691 continue 

692 

693 with self.subTest(mode=mode): 

694 datastore = self.makeDatastore() 

695 

696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

697 """Ingest a file already in the datastore root.""" 

698 # first move it into the root, and adjust the path 

699 # accordingly 

700 path = shutil.copy(path, datastore.root.ospath) 

701 path = os.path.relpath(path, start=datastore.root.ospath) 

702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

703 self.assertEqual(obj, datastore.get(ref)) 

704 

705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

706 """Can't ingest files if we're given a bad path.""" 

707 with self.assertRaises(FileNotFoundError): 

708 datastore.ingest( 

709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

710 ) 

711 self.assertFalse(datastore.exists(ref)) 

712 

713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

714 """Can't ingest files outside of datastore root unless 

715 auto. 

716 """ 

717 if mode == "auto": 

718 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

719 self.assertTrue(datastore.exists(ref)) 

720 else: 

721 with self.assertRaises(RuntimeError): 

722 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

723 self.assertFalse(datastore.exists(ref)) 

724 

725 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

726 with self.assertRaises(NotImplementedError): 

727 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

728 

729 if mode in self.ingestTransferModes: 

730 self.runIngestTest(failOutsideRoot) 

731 self.runIngestTest(failInputDoesNotExist) 

732 self.runIngestTest(succeed) 

733 else: 

734 self.runIngestTest(failNotImplemented) 

735 

736 def testIngestTransfer(self) -> None: 

737 """Test ingesting existing files after transferring them.""" 

738 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

739 with self.subTest(mode=mode): 

740 datastore = self.makeDatastore(mode) 

741 

742 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

743 """Ingest a file by transferring it to the template 

744 location. 

745 """ 

746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

747 self.assertEqual(obj, datastore.get(ref)) 

748 

749 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

750 """Can't ingest files if we're given a bad path.""" 

751 with self.assertRaises(FileNotFoundError): 

752 # Ensure the file does not look like it is in 

753 # datastore for auto mode 

754 datastore.ingest( 

755 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

756 ) 

757 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

758 

759 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

760 with self.assertRaises(NotImplementedError): 

761 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

762 

763 if mode in self.ingestTransferModes: 

764 self.runIngestTest(failInputDoesNotExist) 

765 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

766 else: 

767 self.runIngestTest(failNotImplemented) 

768 

769 def testIngestSymlinkOfSymlink(self) -> None: 

770 """Special test for symlink to a symlink ingest""" 

771 metrics, ref = self._prepareIngestTest() 

772 # The aim of this test is to create a dataset on disk, then 

773 # create a symlink to it and finally ingest the symlink such that 

774 # the symlink in the datastore points to the original dataset. 

775 for mode in ("symlink", "relsymlink"): 

776 if mode not in self.ingestTransferModes: 

777 continue 

778 

779 print(f"Trying mode {mode}") 

780 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

781 with open(realpath, "w") as fd: 

782 yaml.dump(metrics._asdict(), stream=fd) 

783 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

784 os.symlink(os.path.abspath(realpath), sympath) 

785 

786 datastore = self.makeDatastore() 

787 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

788 

789 uri = datastore.getURI(ref) 

790 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

791 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

792 

793 linkTarget = os.readlink(uri.ospath) 

794 if mode == "relsymlink": 

795 self.assertFalse(os.path.isabs(linkTarget)) 

796 else: 

797 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

798 

799 # Check that we can get the dataset back regardless of mode 

800 metric2 = datastore.get(ref) 

801 self.assertEqual(metric2, metrics) 

802 

803 # Cleanup the file for next time round loop 

804 # since it will get the same file name in store 

805 datastore.remove(ref) 

806 

807 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

808 datastore = self.makeDatastore(name) 

809 

810 # For now only the FileDatastore can be used for this test. 

811 # ChainedDatastore that only includes InMemoryDatastores have to be 

812 # skipped as well. 

813 for name in datastore.names: 

814 if not name.startswith("InMemoryDatastore"): 

815 break 

816 else: 

817 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

818 

819 metrics = makeExampleMetrics() 

820 dimensions = self.universe.extract(("visit", "physical_filter")) 

821 sc = self.storageClassFactory.getStorageClass("StructuredData") 

822 

823 refs = [] 

824 for visit in (2048, 2049, 2050): 

825 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

826 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

827 datastore.put(metrics, ref) 

828 refs.append(ref) 

829 return datastore, refs 

830 

831 def testExportImportRecords(self) -> None: 

832 """Test for export_records and import_records methods.""" 

833 datastore, refs = self._populate_export_datastore("test_datastore") 

834 for exported_refs in (refs, refs[1:]): 

835 n_refs = len(exported_refs) 

836 records = datastore.export_records(exported_refs) 

837 self.assertGreater(len(records), 0) 

838 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

839 # In a ChainedDatastore each FileDatastore will have a complete set 

840 for datastore_name in records: 

841 record_data = records[datastore_name] 

842 self.assertEqual(len(record_data.records), n_refs) 

843 

844 # Check that subsetting works, include non-existing dataset ID. 

845 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

846 subset = record_data.subset(dataset_ids) 

847 assert subset is not None 

848 self.assertEqual(len(subset.records), 1) 

849 subset = record_data.subset({uuid.uuid4()}) 

850 self.assertIsNone(subset) 

851 

852 # Use the same datastore name to import relative path. 

853 datastore2 = self.makeDatastore("test_datastore") 

854 

855 records = datastore.export_records(refs[1:]) 

856 datastore2.import_records(records) 

857 

858 with self.assertRaises(FileNotFoundError): 

859 data = datastore2.get(refs[0]) 

860 data = datastore2.get(refs[1]) 

861 self.assertIsNotNone(data) 

862 data = datastore2.get(refs[2]) 

863 self.assertIsNotNone(data) 

864 

865 def testExport(self) -> None: 

866 datastore, refs = self._populate_export_datastore("test_datastore") 

867 

868 datasets = list(datastore.export(refs)) 

869 self.assertEqual(len(datasets), 3) 

870 

871 for transfer in (None, "auto"): 

872 # Both will default to None 

873 datasets = list(datastore.export(refs, transfer=transfer)) 

874 self.assertEqual(len(datasets), 3) 

875 

876 with self.assertRaises(TypeError): 

877 list(datastore.export(refs, transfer="copy")) 

878 

879 with self.assertRaises(TypeError): 

880 list(datastore.export(refs, directory="exportDir", transfer="move")) 

881 

882 # Create a new ref that is not known to the datastore and try to 

883 # export it. 

884 sc = self.storageClassFactory.getStorageClass("ThingOne") 

885 dimensions = self.universe.extract(("visit", "physical_filter")) 

886 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

887 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

888 with self.assertRaises(FileNotFoundError): 

889 list(datastore.export(refs + [ref], transfer=None)) 

890 

891 def test_pydantic_dict_storage_class_conversions(self) -> None: 

892 """Test converting a dataset stored as a pydantic model into a dict on 

893 read. 

894 """ 

895 datastore = self.makeDatastore() 

896 store_as_model = self.makeDatasetRef( 

897 "store_as_model", 

898 dimensions=self.universe.empty, 

899 storageClass="DictConvertibleModel", 

900 dataId=DataCoordinate.makeEmpty(self.universe), 

901 ) 

902 content = {"a": "one", "b": "two"} 

903 model = DictConvertibleModel.from_dict(content, extra="original content") 

904 datastore.put(model, store_as_model) 

905 retrieved_model = datastore.get(store_as_model) 

906 self.assertEqual(retrieved_model, model) 

907 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

908 self.assertEqual(type(loaded), dict) 

909 self.assertEqual(loaded, content) 

910 

911 def test_simple_class_put_get(self) -> None: 

912 """Test that we can put and get a simple class with dict() 

913 constructor. 

914 """ 

915 datastore = self.makeDatastore() 

916 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

917 self._assert_different_puts(datastore, "MetricsExample", data) 

918 

919 def test_dataclass_put_get(self) -> None: 

920 """Test that we can put and get a simple dataclass.""" 

921 datastore = self.makeDatastore() 

922 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

923 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

924 

925 def test_pydantic_put_get(self) -> None: 

926 """Test that we can put and get a simple Pydantic model.""" 

927 datastore = self.makeDatastore() 

928 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

929 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

930 

931 def test_tuple_put_get(self) -> None: 

932 """Test that we can put and get a tuple.""" 

933 datastore = self.makeDatastore() 

934 data = tuple(["a", "b", 1]) 

935 self._assert_different_puts(datastore, "TupleExample", data) 

936 

937 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

938 refs = { 

939 x: self.makeDatasetRef( 

940 f"stora_as_{x}", 

941 dimensions=self.universe.empty, 

942 storageClass=f"{storageClass_root}{x}", 

943 dataId=DataCoordinate.makeEmpty(self.universe), 

944 ) 

945 for x in ["A", "B"] 

946 } 

947 

948 for ref in refs.values(): 

949 datastore.put(data, ref) 

950 

951 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

952 

953 

954class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

955 """PosixDatastore specialization""" 

956 

957 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

958 uriScheme = "file" 

959 canIngestNoTransferAuto = True 

960 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

961 isEphemeral = False 

962 rootKeys = ("root",) 

963 validationCanFail = True 

964 

965 def setUp(self) -> None: 

966 # Override the working directory before calling the base class 

967 self.root = tempfile.mkdtemp(dir=TESTDIR) 

968 super().setUp() 

969 

970 def testAtomicWrite(self) -> None: 

971 """Test that we write to a temporary and then rename""" 

972 datastore = self.makeDatastore() 

973 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

974 dimensions = self.universe.extract(("visit", "physical_filter")) 

975 metrics = makeExampleMetrics() 

976 

977 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

978 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

979 

980 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

981 datastore.put(metrics, ref) 

982 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

983 self.assertIn("transfer=move", move_logs[0]) 

984 

985 # And the transfer should be file to file. 

986 self.assertEqual(move_logs[0].count("file://"), 2) 

987 

988 def testCanNotDeterminePutFormatterLocation(self) -> None: 

989 """Verify that the expected exception is raised if the FileDatastore 

990 can not determine the put formatter location. 

991 """ 

992 _ = makeExampleMetrics() 

993 datastore = self.makeDatastore() 

994 

995 # Create multiple storage classes for testing different formulations 

996 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

997 

998 sccomp = StorageClass("Dummy") 

999 compositeStorageClass = StorageClass( 

1000 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1001 ) 

1002 

1003 dimensions = self.universe.extract(("visit", "physical_filter")) 

1004 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1005 

1006 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1007 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1008 

1009 def raiser(ref: DatasetRef) -> None: 

1010 raise DatasetTypeNotSupportedError() 

1011 

1012 with unittest.mock.patch.object( 

1013 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1014 "_determine_put_formatter_location", 

1015 side_effect=raiser, 

1016 ): 

1017 # verify the non-composite ref execution path: 

1018 with self.assertRaises(DatasetTypeNotSupportedError): 

1019 datastore.getURIs(ref, predict=True) 

1020 

1021 # verify the composite-ref execution path: 

1022 with self.assertRaises(DatasetTypeNotSupportedError): 

1023 datastore.getURIs(compRef, predict=True) 

1024 

1025 

1026class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1027 """Posix datastore tests but with checksums disabled.""" 

1028 

1029 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1030 

1031 def testChecksum(self) -> None: 

1032 """Ensure that checksums have not been calculated.""" 

1033 datastore = self.makeDatastore() 

1034 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1035 dimensions = self.universe.extract(("visit", "physical_filter")) 

1036 metrics = makeExampleMetrics() 

1037 

1038 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

1039 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1040 

1041 # Configuration should have disabled checksum calculation 

1042 datastore.put(metrics, ref) 

1043 infos = datastore.getStoredItemsInfo(ref) 

1044 self.assertIsNone(infos[0].checksum) 

1045 

1046 # Remove put back but with checksums enabled explicitly 

1047 datastore.remove(ref) 

1048 datastore.useChecksum = True 

1049 datastore.put(metrics, ref) 

1050 

1051 infos = datastore.getStoredItemsInfo(ref) 

1052 self.assertIsNotNone(infos[0].checksum) 

1053 

1054 

1055class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1056 """Restrict trash test to FileDatastore.""" 

1057 

1058 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1059 

1060 def testTrash(self) -> None: 

1061 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1062 

1063 # Trash one of them. 

1064 ref = refs.pop() 

1065 uri = datastore.getURI(ref) 

1066 datastore.trash(ref) 

1067 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1068 datastore.emptyTrash() 

1069 self.assertFalse(uri.exists(), uri) 

1070 

1071 # Trash it again should be fine. 

1072 datastore.trash(ref) 

1073 

1074 # Trash multiple items at once. 

1075 subset = [refs.pop(), refs.pop()] 

1076 datastore.trash(subset) 

1077 datastore.emptyTrash() 

1078 

1079 # Remove a record and trash should do nothing. 

1080 # This is execution butler scenario. 

1081 ref = refs.pop() 

1082 uri = datastore.getURI(ref) 

1083 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1084 self.assertTrue(uri.exists()) 

1085 datastore.trash(ref) 

1086 datastore.emptyTrash() 

1087 self.assertTrue(uri.exists()) 

1088 

1089 # Switch on trust and it should delete the file. 

1090 datastore.trustGetRequest = True 

1091 datastore.trash([ref]) 

1092 self.assertFalse(uri.exists()) 

1093 

1094 # Remove multiples at once in trust mode. 

1095 subset = [refs.pop() for i in range(3)] 

1096 datastore.trash(subset) 

1097 datastore.trash(refs.pop()) # Check that a single ref can trash 

1098 

1099 

1100class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1101 """Test datastore cleans up on failure.""" 

1102 

1103 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1104 

1105 def setUp(self) -> None: 

1106 # Override the working directory before calling the base class 

1107 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1108 super().setUp() 

1109 

1110 def testCleanup(self) -> None: 

1111 """Test that a failed formatter write does cleanup a partial file.""" 

1112 metrics = makeExampleMetrics() 

1113 datastore = self.makeDatastore() 

1114 

1115 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1116 

1117 dimensions = self.universe.extract(("visit", "physical_filter")) 

1118 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1119 

1120 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1121 

1122 # Determine where the file will end up (we assume Formatters use 

1123 # the same file extension) 

1124 expectedUri = datastore.getURI(ref, predict=True) 

1125 self.assertEqual(expectedUri.fragment, "predicted") 

1126 

1127 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1128 

1129 # Try formatter that fails and formatter that fails and leaves 

1130 # a file behind 

1131 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1132 with self.subTest(formatter=formatter): 

1133 # Monkey patch the formatter 

1134 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1135 

1136 # Try to put the dataset, it should fail 

1137 with self.assertRaises(Exception): 

1138 datastore.put(metrics, ref) 

1139 

1140 # Check that there is no file on disk 

1141 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1142 

1143 # Check that there is a directory 

1144 dir = expectedUri.dirname() 

1145 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1146 

1147 # Force YamlFormatter and check that this time a file is written 

1148 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1149 datastore.put(metrics, ref) 

1150 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1151 datastore.remove(ref) 

1152 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1153 

1154 

1155class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1156 """PosixDatastore specialization""" 

1157 

1158 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1159 uriScheme = "mem" 

1160 hasUnsupportedPut = False 

1161 ingestTransferModes = () 

1162 isEphemeral = True 

1163 rootKeys = None 

1164 validationCanFail = False 

1165 

1166 

1167class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1168 """ChainedDatastore specialization using a POSIXDatastore""" 

1169 

1170 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1171 hasUnsupportedPut = False 

1172 canIngestNoTransferAuto = False 

1173 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1174 isEphemeral = False 

1175 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1176 validationCanFail = True 

1177 

1178 

1179class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1180 """ChainedDatastore specialization using all InMemoryDatastore""" 

1181 

1182 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1183 validationCanFail = False 

1184 

1185 

1186class DatastoreConstraintsTests(DatastoreTestsBase): 

1187 """Basic tests of constraints model of Datastores.""" 

1188 

1189 def testConstraints(self) -> None: 

1190 """Test constraints model. Assumes that each test class has the 

1191 same constraints. 

1192 """ 

1193 metrics = makeExampleMetrics() 

1194 datastore = self.makeDatastore() 

1195 

1196 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1197 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1198 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1199 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}) 

1200 

1201 # Write empty file suitable for ingest check (JSON and YAML variants) 

1202 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1203 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1204 for datasetTypeName, sc, accepted in ( 

1205 ("metric", sc1, True), 

1206 ("metric5", sc1, False), 

1207 ("metric33", sc1, True), 

1208 ("metric5", sc2, True), 

1209 ): 

1210 # Choose different temp file depending on StorageClass 

1211 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1212 

1213 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1214 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1215 if accepted: 

1216 datastore.put(metrics, ref) 

1217 self.assertTrue(datastore.exists(ref)) 

1218 datastore.remove(ref) 

1219 

1220 # Try ingest 

1221 if self.canIngest: 

1222 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1223 self.assertTrue(datastore.exists(ref)) 

1224 datastore.remove(ref) 

1225 else: 

1226 with self.assertRaises(DatasetTypeNotSupportedError): 

1227 datastore.put(metrics, ref) 

1228 self.assertFalse(datastore.exists(ref)) 

1229 

1230 # Again with ingest 

1231 if self.canIngest: 

1232 with self.assertRaises(DatasetTypeNotSupportedError): 

1233 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1234 self.assertFalse(datastore.exists(ref)) 

1235 

1236 

1237class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1238 """PosixDatastore specialization""" 

1239 

1240 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1241 canIngest = True 

1242 

1243 def setUp(self) -> None: 

1244 # Override the working directory before calling the base class 

1245 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1246 super().setUp() 

1247 

1248 

1249class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1250 """InMemoryDatastore specialization.""" 

1251 

1252 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1253 canIngest = False 

1254 

1255 

1256class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1257 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1258 at the ChainedDatstore. 

1259 """ 

1260 

1261 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1262 

1263 

1264class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1265 """ChainedDatastore specialization using a POSIXDatastore.""" 

1266 

1267 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1268 

1269 

1270class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1271 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1272 

1273 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1274 canIngest = False 

1275 

1276 

1277class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1278 """Test that a chained datastore can control constraints per-datastore 

1279 even if child datastore would accept. 

1280 """ 

1281 

1282 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1283 

1284 def setUp(self) -> None: 

1285 # Override the working directory before calling the base class 

1286 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1287 super().setUp() 

1288 

1289 def testConstraints(self) -> None: 

1290 """Test chained datastore constraints model.""" 

1291 metrics = makeExampleMetrics() 

1292 datastore = self.makeDatastore() 

1293 

1294 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1295 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1296 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1297 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1298 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1299 

1300 # Write empty file suitable for ingest check (JSON and YAML variants) 

1301 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1302 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1303 

1304 for typeName, dataId, sc, accept, ingest in ( 

1305 ("metric", dataId1, sc1, (False, True, False), True), 

1306 ("metric5", dataId1, sc1, (False, False, False), False), 

1307 ("metric5", dataId2, sc1, (True, False, False), False), 

1308 ("metric33", dataId2, sc2, (True, True, False), True), 

1309 ("metric5", dataId1, sc2, (False, True, False), True), 

1310 ): 

1311 # Choose different temp file depending on StorageClass 

1312 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1313 

1314 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1315 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1316 if any(accept): 

1317 datastore.put(metrics, ref) 

1318 self.assertTrue(datastore.exists(ref)) 

1319 

1320 # Check each datastore inside the chained datastore 

1321 for childDatastore, expected in zip(datastore.datastores, accept): 

1322 self.assertEqual( 

1323 childDatastore.exists(ref), 

1324 expected, 

1325 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1326 ) 

1327 

1328 datastore.remove(ref) 

1329 

1330 # Check that ingest works 

1331 if ingest: 

1332 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1333 self.assertTrue(datastore.exists(ref)) 

1334 

1335 # Check each datastore inside the chained datastore 

1336 for childDatastore, expected in zip(datastore.datastores, accept): 

1337 # Ephemeral datastores means InMemory at the moment 

1338 # and that does not accept ingest of files. 

1339 if childDatastore.isEphemeral: 

1340 expected = False 

1341 self.assertEqual( 

1342 childDatastore.exists(ref), 

1343 expected, 

1344 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1345 ) 

1346 

1347 datastore.remove(ref) 

1348 else: 

1349 with self.assertRaises(DatasetTypeNotSupportedError): 

1350 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1351 

1352 else: 

1353 with self.assertRaises(DatasetTypeNotSupportedError): 

1354 datastore.put(metrics, ref) 

1355 self.assertFalse(datastore.exists(ref)) 

1356 

1357 # Again with ingest 

1358 with self.assertRaises(DatasetTypeNotSupportedError): 

1359 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1360 self.assertFalse(datastore.exists(ref)) 

1361 

1362 

1363class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1364 """Tests for datastore caching infrastructure.""" 

1365 

1366 @classmethod 

1367 def setUpClass(cls) -> None: 

1368 cls.storageClassFactory = StorageClassFactory() 

1369 cls.universe = DimensionUniverse() 

1370 

1371 # Ensure that we load the test storage class definitions. 

1372 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1373 cls.storageClassFactory.addFromConfig(scConfigFile) 

1374 

1375 def setUp(self) -> None: 

1376 self.id = 0 

1377 

1378 # Create a root that we can use for caching tests. 

1379 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1380 

1381 # Create some test dataset refs and associated test files 

1382 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1383 dimensions = self.universe.extract(("visit", "physical_filter")) 

1384 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1385 

1386 # Create list of refs and list of temporary files 

1387 n_datasets = 10 

1388 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1389 

1390 root_uri = ResourcePath(self.root, forceDirectory=True) 

1391 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1392 

1393 # Create test files. 

1394 for uri in self.files: 

1395 uri.write(b"0123456789") 

1396 

1397 # Create some composite refs with component files. 

1398 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1399 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1400 self.comp_files = [] 

1401 self.comp_refs = [] 

1402 for n, ref in enumerate(self.composite_refs): 

1403 component_refs = [] 

1404 component_files = [] 

1405 for component in sc.components: 

1406 component_ref = ref.makeComponentRef(component) 

1407 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1408 component_refs.append(component_ref) 

1409 component_files.append(file) 

1410 file.write(b"9876543210") 

1411 

1412 self.comp_files.append(component_files) 

1413 self.comp_refs.append(component_refs) 

1414 

1415 def tearDown(self) -> None: 

1416 if self.root is not None and os.path.exists(self.root): 

1417 shutil.rmtree(self.root, ignore_errors=True) 

1418 

1419 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1420 config = Config.fromYaml(config_str) 

1421 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1422 

1423 def testNoCacheDir(self) -> None: 

1424 config_str = """ 

1425cached: 

1426 root: null 

1427 cacheable: 

1428 metric0: true 

1429 """ 

1430 cache_manager = self._make_cache_manager(config_str) 

1431 

1432 # Look inside to check we don't have a cache directory 

1433 self.assertIsNone(cache_manager._cache_directory) 

1434 

1435 self.assertCache(cache_manager) 

1436 

1437 # Test that the cache directory is marked temporary 

1438 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1439 

1440 def testNoCacheDirReversed(self) -> None: 

1441 """Use default caching status and metric1 to false""" 

1442 config_str = """ 

1443cached: 

1444 root: null 

1445 default: true 

1446 cacheable: 

1447 metric1: false 

1448 """ 

1449 cache_manager = self._make_cache_manager(config_str) 

1450 

1451 self.assertCache(cache_manager) 

1452 

1453 def testEnvvarCacheDir(self) -> None: 

1454 config_str = f""" 

1455cached: 

1456 root: '{self.root}' 

1457 cacheable: 

1458 metric0: true 

1459 """ 

1460 

1461 root = ResourcePath(self.root, forceDirectory=True) 

1462 env_dir = root.join("somewhere", forceDirectory=True) 

1463 elsewhere = root.join("elsewhere", forceDirectory=True) 

1464 

1465 # Environment variable should override the config value. 

1466 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1467 cache_manager = self._make_cache_manager(config_str) 

1468 self.assertEqual(cache_manager.cache_directory, env_dir) 

1469 

1470 # This environment variable should not override the config value. 

1471 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1472 cache_manager = self._make_cache_manager(config_str) 

1473 self.assertEqual(cache_manager.cache_directory, root) 

1474 

1475 # No default setting. 

1476 config_str = """ 

1477cached: 

1478 root: null 

1479 default: true 

1480 cacheable: 

1481 metric1: false 

1482 """ 

1483 cache_manager = self._make_cache_manager(config_str) 

1484 

1485 # This environment variable should override the config value. 

1486 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1487 cache_manager = self._make_cache_manager(config_str) 

1488 self.assertEqual(cache_manager.cache_directory, env_dir) 

1489 

1490 # If both environment variables are set the main (not IF_UNSET) 

1491 # variable should win. 

1492 with unittest.mock.patch.dict( 

1493 os.environ, 

1494 { 

1495 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1496 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1497 }, 

1498 ): 

1499 cache_manager = self._make_cache_manager(config_str) 

1500 self.assertEqual(cache_manager.cache_directory, env_dir) 

1501 

1502 # Use the API to set the environment variable, making sure that the 

1503 # variable is reset on exit. 

1504 with unittest.mock.patch.dict( 

1505 os.environ, 

1506 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1507 ): 

1508 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1509 self.assertTrue(defined) 

1510 cache_manager = self._make_cache_manager(config_str) 

1511 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1512 

1513 # Now create the cache manager ahead of time and set the fallback 

1514 # later. 

1515 cache_manager = self._make_cache_manager(config_str) 

1516 self.assertIsNone(cache_manager._cache_directory) 

1517 with unittest.mock.patch.dict( 

1518 os.environ, 

1519 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1520 ): 

1521 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1522 self.assertTrue(defined) 

1523 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1524 

1525 def testExplicitCacheDir(self) -> None: 

1526 config_str = f""" 

1527cached: 

1528 root: '{self.root}' 

1529 cacheable: 

1530 metric0: true 

1531 """ 

1532 cache_manager = self._make_cache_manager(config_str) 

1533 

1534 # Look inside to check we do have a cache directory. 

1535 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1536 

1537 self.assertCache(cache_manager) 

1538 

1539 # Test that the cache directory is not marked temporary 

1540 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1541 

1542 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1543 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1544 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1545 

1546 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1547 self.assertIsInstance(uri, ResourcePath) 

1548 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1549 

1550 # Check presence in cache using ref and then using file extension. 

1551 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1552 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1553 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1554 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1555 

1556 # Cached file should no longer exist but uncached file should be 

1557 # unaffected. 

1558 self.assertFalse(self.files[0].exists()) 

1559 self.assertTrue(self.files[1].exists()) 

1560 

1561 # Should find this file and it should be within the cache directory. 

1562 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1563 self.assertTrue(found.exists()) 

1564 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1565 

1566 # Should not be able to find these in cache 

1567 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1568 self.assertIsNone(found) 

1569 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1570 self.assertIsNone(found) 

1571 

1572 def testNoCache(self) -> None: 

1573 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1574 for uri, ref in zip(self.files, self.refs): 

1575 self.assertFalse(cache_manager.should_be_cached(ref)) 

1576 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1577 self.assertFalse(cache_manager.known_to_cache(ref)) 

1578 with cache_manager.find_in_cache(ref, ".txt") as found: 

1579 self.assertIsNone(found, msg=f"{cache_manager}") 

1580 

1581 def _expiration_config(self, mode: str, threshold: int) -> str: 

1582 return f""" 

1583cached: 

1584 default: true 

1585 expiry: 

1586 mode: {mode} 

1587 threshold: {threshold} 

1588 cacheable: 

1589 unused: true 

1590 """ 

1591 

1592 def testCacheExpiryFiles(self) -> None: 

1593 threshold = 2 # Keep at least 2 files. 

1594 mode = "files" 

1595 config_str = self._expiration_config(mode, threshold) 

1596 

1597 cache_manager = self._make_cache_manager(config_str) 

1598 

1599 # Check that an empty cache returns unknown for arbitrary ref 

1600 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1601 

1602 # Should end with datasets: 2, 3, 4 

1603 self.assertExpiration(cache_manager, 5, threshold + 1) 

1604 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1605 

1606 # Check that we will not expire a file that is actively in use. 

1607 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1608 self.assertIsNotNone(found) 

1609 

1610 # Trigger cache expiration that should remove the file 

1611 # we just retrieved. Should now have: 3, 4, 5 

1612 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1613 self.assertIsNotNone(cached) 

1614 

1615 # Cache should still report the standard file count. 

1616 self.assertEqual(cache_manager.file_count, threshold + 1) 

1617 

1618 # Add additional entry to cache. 

1619 # Should now have 4, 5, 6 

1620 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1621 self.assertIsNotNone(cached) 

1622 

1623 # Is the file still there? 

1624 self.assertTrue(found.exists()) 

1625 

1626 # Can we read it? 

1627 data = found.read() 

1628 self.assertGreater(len(data), 0) 

1629 

1630 # Outside context the file should no longer exist. 

1631 self.assertFalse(found.exists()) 

1632 

1633 # File count should not have changed. 

1634 self.assertEqual(cache_manager.file_count, threshold + 1) 

1635 

1636 # Dataset 2 was in the exempt directory but because hardlinks 

1637 # are used it was deleted from the main cache during cache expiry 

1638 # above and so should no longer be found. 

1639 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1640 self.assertIsNone(found) 

1641 

1642 # And the one stored after it is also gone. 

1643 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1644 self.assertIsNone(found) 

1645 

1646 # But dataset 4 is present. 

1647 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1648 self.assertIsNotNone(found) 

1649 

1650 # Adding a new dataset to the cache should now delete it. 

1651 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1652 

1653 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1654 self.assertIsNone(found) 

1655 

1656 def testCacheExpiryDatasets(self) -> None: 

1657 threshold = 2 # Keep 2 datasets. 

1658 mode = "datasets" 

1659 config_str = self._expiration_config(mode, threshold) 

1660 

1661 cache_manager = self._make_cache_manager(config_str) 

1662 self.assertExpiration(cache_manager, 5, threshold + 1) 

1663 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1664 

1665 def testCacheExpiryDatasetsComposite(self) -> None: 

1666 threshold = 2 # Keep 2 datasets. 

1667 mode = "datasets" 

1668 config_str = self._expiration_config(mode, threshold) 

1669 

1670 cache_manager = self._make_cache_manager(config_str) 

1671 

1672 n_datasets = 3 

1673 for i in range(n_datasets): 

1674 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1675 cached = cache_manager.move_to_cache(component_file, component_ref) 

1676 self.assertIsNotNone(cached) 

1677 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1678 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1679 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1680 

1681 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1682 

1683 # Write two new non-composite and the number of files should drop. 

1684 self.assertExpiration(cache_manager, 2, 5) 

1685 

1686 def testCacheExpirySize(self) -> None: 

1687 threshold = 55 # Each file is 10 bytes 

1688 mode = "size" 

1689 config_str = self._expiration_config(mode, threshold) 

1690 

1691 cache_manager = self._make_cache_manager(config_str) 

1692 self.assertExpiration(cache_manager, 10, 6) 

1693 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1694 

1695 def assertExpiration( 

1696 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1697 ) -> None: 

1698 """Insert the datasets and then check the number retained.""" 

1699 for i in range(n_datasets): 

1700 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1701 self.assertIsNotNone(cached) 

1702 

1703 self.assertEqual(cache_manager.file_count, n_retained) 

1704 

1705 # The oldest file should not be in the cache any more. 

1706 for i in range(n_datasets): 

1707 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1708 if i >= n_datasets - n_retained: 

1709 self.assertIsInstance(found, ResourcePath) 

1710 else: 

1711 self.assertIsNone(found) 

1712 

1713 def testCacheExpiryAge(self) -> None: 

1714 threshold = 1 # Expire older than 2 seconds 

1715 mode = "age" 

1716 config_str = self._expiration_config(mode, threshold) 

1717 

1718 cache_manager = self._make_cache_manager(config_str) 

1719 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1720 

1721 # Insert 3 files, then sleep, then insert more. 

1722 for i in range(2): 

1723 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1724 self.assertIsNotNone(cached) 

1725 time.sleep(2.0) 

1726 for j in range(4): 

1727 i = 2 + j # Continue the counting 

1728 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1729 self.assertIsNotNone(cached) 

1730 

1731 # Only the files written after the sleep should exist. 

1732 self.assertEqual(cache_manager.file_count, 4) 

1733 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1734 self.assertIsNone(found) 

1735 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1736 self.assertIsInstance(found, ResourcePath) 

1737 

1738 

1739class DatasetRefURIsTestCase(unittest.TestCase): 

1740 """Tests for DatasetRefURIs.""" 

1741 

1742 def testSequenceAccess(self) -> None: 

1743 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1744 uris = DatasetRefURIs() 

1745 

1746 self.assertEqual(len(uris), 2) 

1747 self.assertEqual(uris[0], None) 

1748 self.assertEqual(uris[1], {}) 

1749 

1750 primaryURI = ResourcePath("1/2/3") 

1751 componentURI = ResourcePath("a/b/c") 

1752 

1753 # affirm that DatasetRefURIs does not support MutableSequence functions 

1754 with self.assertRaises(TypeError): 

1755 uris[0] = primaryURI 

1756 with self.assertRaises(TypeError): 

1757 uris[1] = {"foo": componentURI} 

1758 

1759 # but DatasetRefURIs can be set by property name: 

1760 uris.primaryURI = primaryURI 

1761 uris.componentURIs = {"foo": componentURI} 

1762 self.assertEqual(uris.primaryURI, primaryURI) 

1763 self.assertEqual(uris[0], primaryURI) 

1764 

1765 primary, components = uris 

1766 self.assertEqual(primary, primaryURI) 

1767 self.assertEqual(components, {"foo": componentURI}) 

1768 

1769 def testRepr(self) -> None: 

1770 """Verify __repr__ output.""" 

1771 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1772 self.assertEqual( 

1773 repr(uris), 

1774 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1775 ) 

1776 

1777 

1778class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1779 """Test the StoredFileInfo class.""" 

1780 

1781 storageClassFactory = StorageClassFactory() 

1782 

1783 def test_StoredFileInfo(self) -> None: 

1784 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1785 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1786 

1787 record = dict( 

1788 storage_class="StructuredDataDict", 

1789 formatter="lsst.daf.butler.Formatter", 

1790 path="a/b/c.txt", 

1791 component="component", 

1792 dataset_id=ref.id, 

1793 checksum=None, 

1794 file_size=5, 

1795 ) 

1796 info = StoredFileInfo.from_record(record) 

1797 

1798 self.assertEqual(info.dataset_id, ref.id) 

1799 self.assertEqual(info.to_record(), record) 

1800 

1801 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1802 rebased = info.rebase(ref2) 

1803 self.assertEqual(rebased.dataset_id, ref2.id) 

1804 self.assertEqual(rebased.rebase(ref), info) 

1805 

1806 with self.assertRaises(TypeError): 

1807 rebased.update(formatter=42) 

1808 

1809 with self.assertRaises(ValueError): 

1810 rebased.update(something=42, new="42") 

1811 

1812 # Check that pickle works on StoredFileInfo. 

1813 pickled_info = pickle.dumps(info) 

1814 unpickled_info = pickle.loads(pickled_info) 

1815 self.assertEqual(unpickled_info, info) 

1816 

1817 

1818if __name__ == "__main__": 

1819 unittest.main()