Coverage for tests/test_datastore.py: 11%

1009 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-23 09:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 StorageClass, 

53 StorageClassFactory, 

54 StoredFileInfo, 

55) 

56from lsst.daf.butler.formatters.yaml import YamlFormatter 

57from lsst.daf.butler.tests import ( 

58 BadNoWriteFormatter, 

59 BadWriteFormatter, 

60 DatasetTestHelper, 

61 DatastoreTestHelper, 

62 DummyRegistry, 

63 MetricsExample, 

64 MetricsExampleDataclass, 

65 MetricsExampleModel, 

66) 

67from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

68from lsst.daf.butler.tests.utils import TestCaseMixin 

69from lsst.resources import ResourcePath 

70from lsst.utils import doImport 

71 

72TESTDIR = os.path.dirname(__file__) 

73 

74 

75def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

76 if use_none: 

77 array = None 

78 else: 

79 array = [563, 234, 456.7, 105, 2054, -1045] 

80 return MetricsExample( 

81 {"AM1": 5.2, "AM2": 30.6}, 

82 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

83 array, 

84 ) 

85 

86 

87class TransactionTestError(Exception): 

88 """Specific error for transactions, to prevent misdiagnosing 

89 that might otherwise occur when a standard exception is used. 

90 """ 

91 

92 pass 

93 

94 

95class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

96 """Support routines for datastore testing""" 

97 

98 root: str | None = None 

99 universe: DimensionUniverse 

100 storageClassFactory: StorageClassFactory 

101 

102 @classmethod 

103 def setUpClass(cls) -> None: 

104 # Storage Classes are fixed for all datastores in these tests 

105 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

106 cls.storageClassFactory = StorageClassFactory() 

107 cls.storageClassFactory.addFromConfig(scConfigFile) 

108 

109 # Read the Datastore config so we can get the class 

110 # information (since we should not assume the constructor 

111 # name here, but rely on the configuration file itself) 

112 datastoreConfig = DatastoreConfig(cls.configFile) 

113 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

114 cls.universe = DimensionUniverse() 

115 

116 def setUp(self) -> None: 

117 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

118 

119 def tearDown(self) -> None: 

120 if self.root is not None and os.path.exists(self.root): 

121 shutil.rmtree(self.root, ignore_errors=True) 

122 

123 

124class DatastoreTests(DatastoreTestsBase): 

125 """Some basic tests of a simple datastore.""" 

126 

127 hasUnsupportedPut = True 

128 rootKeys: tuple[str, ...] | None = None 

129 isEphemeral: bool = False 

130 validationCanFail: bool = False 

131 

132 def testConfigRoot(self) -> None: 

133 full = DatastoreConfig(self.configFile) 

134 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

135 newroot = "/random/location" 

136 self.datastoreType.setConfigRoot(newroot, config, full) 

137 if self.rootKeys: 

138 for k in self.rootKeys: 

139 self.assertIn(newroot, config[k]) 

140 

141 def testConstructor(self) -> None: 

142 datastore = self.makeDatastore() 

143 self.assertIsNotNone(datastore) 

144 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

145 

146 def testConfigurationValidation(self) -> None: 

147 datastore = self.makeDatastore() 

148 sc = self.storageClassFactory.getStorageClass("ThingOne") 

149 datastore.validateConfiguration([sc]) 

150 

151 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

152 if self.validationCanFail: 

153 with self.assertRaises(DatastoreValidationError): 

154 datastore.validateConfiguration([sc2], logFailures=True) 

155 

156 dimensions = self.universe.extract(("visit", "physical_filter")) 

157 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

158 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

159 datastore.validateConfiguration([ref]) 

160 

161 def testParameterValidation(self) -> None: 

162 """Check that parameters are validated""" 

163 sc = self.storageClassFactory.getStorageClass("ThingOne") 

164 dimensions = self.universe.extract(("visit", "physical_filter")) 

165 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

167 datastore = self.makeDatastore() 

168 data = {1: 2, 3: 4} 

169 datastore.put(data, ref) 

170 newdata = datastore.get(ref) 

171 self.assertEqual(data, newdata) 

172 with self.assertRaises(KeyError): 

173 newdata = datastore.get(ref, parameters={"missing": 5}) 

174 

175 def testBasicPutGet(self) -> None: 

176 metrics = makeExampleMetrics() 

177 datastore = self.makeDatastore() 

178 

179 # Create multiple storage classes for testing different formulations 

180 storageClasses = [ 

181 self.storageClassFactory.getStorageClass(sc) 

182 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

183 ] 

184 

185 dimensions = self.universe.extract(("visit", "physical_filter")) 

186 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

187 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}) 

188 

189 for sc in storageClasses: 

190 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

191 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

192 

193 # Make sure that using getManyURIs without predicting before the 

194 # dataset has been put raises. 

195 with self.assertRaises(FileNotFoundError): 

196 datastore.getManyURIs([ref], predict=False) 

197 

198 # Make sure that using getManyURIs with predicting before the 

199 # dataset has been put predicts the URI. 

200 uris = datastore.getManyURIs([ref, ref2], predict=True) 

201 self.assertIn("52", uris[ref].primaryURI.geturl()) 

202 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

203 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

204 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

205 

206 datastore.put(metrics, ref) 

207 

208 # Does it exist? 

209 self.assertTrue(datastore.exists(ref)) 

210 self.assertTrue(datastore.knows(ref)) 

211 multi = datastore.knows_these([ref]) 

212 self.assertTrue(multi[ref]) 

213 multi = datastore.mexists([ref, ref2]) 

214 self.assertTrue(multi[ref]) 

215 self.assertFalse(multi[ref2]) 

216 

217 # Get 

218 metricsOut = datastore.get(ref, parameters=None) 

219 self.assertEqual(metrics, metricsOut) 

220 

221 uri = datastore.getURI(ref) 

222 self.assertEqual(uri.scheme, self.uriScheme) 

223 

224 uris = datastore.getManyURIs([ref]) 

225 self.assertEqual(len(uris), 1) 

226 ref, uri = uris.popitem() 

227 self.assertTrue(uri.primaryURI.exists()) 

228 self.assertFalse(uri.componentURIs) 

229 

230 # Get a component -- we need to construct new refs for them 

231 # with derived storage classes but with parent ID 

232 for comp in ("data", "output"): 

233 compRef = ref.makeComponentRef(comp) 

234 output = datastore.get(compRef) 

235 self.assertEqual(output, getattr(metricsOut, comp)) 

236 

237 uri = datastore.getURI(compRef) 

238 self.assertEqual(uri.scheme, self.uriScheme) 

239 

240 uris = datastore.getManyURIs([compRef]) 

241 self.assertEqual(len(uris), 1) 

242 

243 storageClass = sc 

244 

245 # Check that we can put a metric with None in a component and 

246 # get it back as None 

247 metricsNone = makeExampleMetrics(use_none=True) 

248 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

249 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

250 datastore.put(metricsNone, refNone) 

251 

252 comp = "data" 

253 for comp in ("data", "output"): 

254 compRef = refNone.makeComponentRef(comp) 

255 output = datastore.get(compRef) 

256 self.assertEqual(output, getattr(metricsNone, comp)) 

257 

258 # Check that a put fails if the dataset type is not supported 

259 if self.hasUnsupportedPut: 

260 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

261 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

262 with self.assertRaises(DatasetTypeNotSupportedError): 

263 datastore.put(metrics, ref) 

264 

265 # These should raise 

266 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

267 with self.assertRaises(FileNotFoundError): 

268 # non-existing file 

269 datastore.get(ref) 

270 

271 # Get a URI from it 

272 uri = datastore.getURI(ref, predict=True) 

273 self.assertEqual(uri.scheme, self.uriScheme) 

274 

275 with self.assertRaises(FileNotFoundError): 

276 datastore.getURI(ref) 

277 

278 def testTrustGetRequest(self) -> None: 

279 """Check that we can get datasets that registry knows nothing about.""" 

280 

281 datastore = self.makeDatastore() 

282 

283 # Skip test if the attribute is not defined 

284 if not hasattr(datastore, "trustGetRequest"): 

285 return 

286 

287 metrics = makeExampleMetrics() 

288 

289 i = 0 

290 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

291 i += 1 

292 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

293 

294 if sc_name == "StructuredComposite": 

295 disassembled = True 

296 else: 

297 disassembled = False 

298 

299 # Start datastore in default configuration of using registry 

300 datastore.trustGetRequest = False 

301 

302 # Create multiple storage classes for testing with or without 

303 # disassembly 

304 sc = self.storageClassFactory.getStorageClass(sc_name) 

305 dimensions = self.universe.extract(("visit", "physical_filter")) 

306 

307 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}) 

308 

309 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

310 datastore.put(metrics, ref) 

311 

312 # Does it exist? 

313 self.assertTrue(datastore.exists(ref)) 

314 self.assertTrue(datastore.knows(ref)) 

315 multi = datastore.knows_these([ref]) 

316 self.assertTrue(multi[ref]) 

317 multi = datastore.mexists([ref]) 

318 self.assertTrue(multi[ref]) 

319 

320 # Get 

321 metricsOut = datastore.get(ref) 

322 self.assertEqual(metrics, metricsOut) 

323 

324 # Get the URI(s) 

325 primaryURI, componentURIs = datastore.getURIs(ref) 

326 if disassembled: 

327 self.assertIsNone(primaryURI) 

328 self.assertEqual(len(componentURIs), 3) 

329 else: 

330 self.assertIn(datasetTypeName, primaryURI.path) 

331 self.assertFalse(componentURIs) 

332 

333 # Delete registry entry so now we are trusting 

334 datastore.removeStoredItemInfo(ref) 

335 

336 # Now stop trusting and check that things break 

337 datastore.trustGetRequest = False 

338 

339 # Does it exist? 

340 self.assertFalse(datastore.exists(ref)) 

341 self.assertFalse(datastore.knows(ref)) 

342 multi = datastore.knows_these([ref]) 

343 self.assertFalse(multi[ref]) 

344 multi = datastore.mexists([ref]) 

345 self.assertFalse(multi[ref]) 

346 

347 with self.assertRaises(FileNotFoundError): 

348 datastore.get(ref) 

349 

350 if sc_name != "StructuredDataNoComponents": 

351 with self.assertRaises(FileNotFoundError): 

352 datastore.get(ref.makeComponentRef("data")) 

353 

354 # URI should fail unless we ask for prediction 

355 with self.assertRaises(FileNotFoundError): 

356 datastore.getURIs(ref) 

357 

358 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

359 if disassembled: 

360 self.assertIsNone(predicted_primary) 

361 self.assertEqual(len(predicted_disassembled), 3) 

362 for uri in predicted_disassembled.values(): 

363 self.assertEqual(uri.fragment, "predicted") 

364 self.assertIn(datasetTypeName, uri.path) 

365 else: 

366 self.assertIn(datasetTypeName, predicted_primary.path) 

367 self.assertFalse(predicted_disassembled) 

368 self.assertEqual(predicted_primary.fragment, "predicted") 

369 

370 # Now enable registry-free trusting mode 

371 datastore.trustGetRequest = True 

372 

373 # Try again to get it 

374 metricsOut = datastore.get(ref) 

375 self.assertEqual(metricsOut, metrics) 

376 

377 # Does it exist? 

378 self.assertTrue(datastore.exists(ref)) 

379 

380 # Get a component 

381 if sc_name != "StructuredDataNoComponents": 

382 comp = "data" 

383 compRef = ref.makeComponentRef(comp) 

384 output = datastore.get(compRef) 

385 self.assertEqual(output, getattr(metrics, comp)) 

386 

387 # Get the URI -- if we trust this should work even without 

388 # enabling prediction. 

389 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

390 self.assertEqual(primaryURI2, primaryURI) 

391 self.assertEqual(componentURIs2, componentURIs) 

392 

393 # Check for compatible storage class. 

394 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

395 # Make new dataset ref with compatible storage class. 

396 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

397 

398 # Without `set_retrieve_dataset_type_method` it will fail to 

399 # find correct file. 

400 self.assertFalse(datastore.exists(ref_comp)) 

401 with self.assertRaises(FileNotFoundError): 

402 datastore.get(ref_comp) 

403 with self.assertRaises(FileNotFoundError): 

404 datastore.get(ref, storageClass="StructuredDataDictJson") 

405 

406 # Need a special method to generate stored dataset type. 

407 def _stored_dataset_type(name: str) -> DatasetType: 

408 if name == ref.datasetType.name: 

409 return ref.datasetType 

410 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

411 

412 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

413 

414 # Storage class override with original dataset ref. 

415 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

416 self.assertIsInstance(metrics_as_dict, dict) 

417 

418 # get() should return a dict now. 

419 metrics_as_dict = datastore.get(ref_comp) 

420 self.assertIsInstance(metrics_as_dict, dict) 

421 

422 # exists() should work as well. 

423 self.assertTrue(datastore.exists(ref_comp)) 

424 

425 datastore.set_retrieve_dataset_type_method(None) 

426 

427 def testDisassembly(self) -> None: 

428 """Test disassembly within datastore.""" 

429 metrics = makeExampleMetrics() 

430 if self.isEphemeral: 

431 # in-memory datastore does not disassemble 

432 return 

433 

434 # Create multiple storage classes for testing different formulations 

435 # of composites. One of these will not disassemble to provide 

436 # a reference. 

437 storageClasses = [ 

438 self.storageClassFactory.getStorageClass(sc) 

439 for sc in ( 

440 "StructuredComposite", 

441 "StructuredCompositeTestA", 

442 "StructuredCompositeTestB", 

443 "StructuredCompositeReadComp", 

444 "StructuredData", # No disassembly 

445 "StructuredCompositeReadCompNoDisassembly", 

446 ) 

447 ] 

448 

449 # Create the test datastore 

450 datastore = self.makeDatastore() 

451 

452 # Dummy dataId 

453 dimensions = self.universe.extract(("visit", "physical_filter")) 

454 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

455 

456 for i, sc in enumerate(storageClasses): 

457 with self.subTest(storageClass=sc.name): 

458 # Create a different dataset type each time round 

459 # so that a test failure in this subtest does not trigger 

460 # a cascade of tests because of file clashes 

461 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

462 

463 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

464 

465 datastore.put(metrics, ref) 

466 

467 baseURI, compURIs = datastore.getURIs(ref) 

468 if disassembled: 

469 self.assertIsNone(baseURI) 

470 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

471 else: 

472 self.assertIsNotNone(baseURI) 

473 self.assertEqual(compURIs, {}) 

474 

475 metrics_get = datastore.get(ref) 

476 self.assertEqual(metrics_get, metrics) 

477 

478 # Retrieve the composite with read parameter 

479 stop = 4 

480 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

481 self.assertEqual(metrics_get.summary, metrics.summary) 

482 self.assertEqual(metrics_get.output, metrics.output) 

483 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

484 

485 # Retrieve a component 

486 data = datastore.get(ref.makeComponentRef("data")) 

487 self.assertEqual(data, metrics.data) 

488 

489 # On supported storage classes attempt to access a read 

490 # only component 

491 if "ReadComp" in sc.name: 

492 cRef = ref.makeComponentRef("counter") 

493 counter = datastore.get(cRef) 

494 self.assertEqual(counter, len(metrics.data)) 

495 

496 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

497 self.assertEqual(counter, stop) 

498 

499 datastore.remove(ref) 

500 

501 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

502 metrics = makeExampleMetrics() 

503 datastore = self.makeDatastore() 

504 # Put 

505 dimensions = self.universe.extract(("visit", "physical_filter")) 

506 sc = self.storageClassFactory.getStorageClass("StructuredData") 

507 refs = [] 

508 for i in range(n_refs): 

509 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

510 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

511 datastore.put(metrics, ref) 

512 

513 # Does it exist? 

514 self.assertTrue(datastore.exists(ref)) 

515 

516 # Get 

517 metricsOut = datastore.get(ref) 

518 self.assertEqual(metrics, metricsOut) 

519 refs.append(ref) 

520 

521 return datastore, *refs 

522 

523 def testRemove(self) -> None: 

524 datastore, ref = self.prepDeleteTest() 

525 

526 # Remove 

527 datastore.remove(ref) 

528 

529 # Does it exist? 

530 self.assertFalse(datastore.exists(ref)) 

531 

532 # Do we now get a predicted URI? 

533 uri = datastore.getURI(ref, predict=True) 

534 self.assertEqual(uri.fragment, "predicted") 

535 

536 # Get should now fail 

537 with self.assertRaises(FileNotFoundError): 

538 datastore.get(ref) 

539 # Can only delete once 

540 with self.assertRaises(FileNotFoundError): 

541 datastore.remove(ref) 

542 

543 def testForget(self) -> None: 

544 datastore, ref = self.prepDeleteTest() 

545 

546 # Remove 

547 datastore.forget([ref]) 

548 

549 # Does it exist (as far as we know)? 

550 self.assertFalse(datastore.exists(ref)) 

551 

552 # Do we now get a predicted URI? 

553 uri = datastore.getURI(ref, predict=True) 

554 self.assertEqual(uri.fragment, "predicted") 

555 

556 # Get should now fail 

557 with self.assertRaises(FileNotFoundError): 

558 datastore.get(ref) 

559 

560 # Forgetting again is a silent no-op 

561 datastore.forget([ref]) 

562 

563 # Predicted URI should still point to the file. 

564 self.assertTrue(uri.exists()) 

565 

566 def testTransfer(self) -> None: 

567 metrics = makeExampleMetrics() 

568 

569 dimensions = self.universe.extract(("visit", "physical_filter")) 

570 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}) 

571 

572 sc = self.storageClassFactory.getStorageClass("StructuredData") 

573 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

574 

575 inputDatastore = self.makeDatastore("test_input_datastore") 

576 outputDatastore = self.makeDatastore("test_output_datastore") 

577 

578 inputDatastore.put(metrics, ref) 

579 outputDatastore.transfer(inputDatastore, ref) 

580 

581 metricsOut = outputDatastore.get(ref) 

582 self.assertEqual(metrics, metricsOut) 

583 

584 def testBasicTransaction(self) -> None: 

585 datastore = self.makeDatastore() 

586 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

587 dimensions = self.universe.extract(("visit", "physical_filter")) 

588 nDatasets = 6 

589 dataIds = [ 

590 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"}) 

591 for i in range(nDatasets) 

592 ] 

593 data = [ 

594 ( 

595 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

596 makeExampleMetrics(), 

597 ) 

598 for dataId in dataIds 

599 ] 

600 succeed = data[: nDatasets // 2] 

601 fail = data[nDatasets // 2 :] 

602 # All datasets added in this transaction should continue to exist 

603 with datastore.transaction(): 

604 for ref, metrics in succeed: 

605 datastore.put(metrics, ref) 

606 # Whereas datasets added in this transaction should not 

607 with self.assertRaises(TransactionTestError): 

608 with datastore.transaction(): 

609 for ref, metrics in fail: 

610 datastore.put(metrics, ref) 

611 raise TransactionTestError("This should propagate out of the context manager") 

612 # Check for datasets that should exist 

613 for ref, metrics in succeed: 

614 # Does it exist? 

615 self.assertTrue(datastore.exists(ref)) 

616 # Get 

617 metricsOut = datastore.get(ref, parameters=None) 

618 self.assertEqual(metrics, metricsOut) 

619 # URI 

620 uri = datastore.getURI(ref) 

621 self.assertEqual(uri.scheme, self.uriScheme) 

622 # Check for datasets that should not exist 

623 for ref, _ in fail: 

624 # These should raise 

625 with self.assertRaises(FileNotFoundError): 

626 # non-existing file 

627 datastore.get(ref) 

628 with self.assertRaises(FileNotFoundError): 

629 datastore.getURI(ref) 

630 

631 def testNestedTransaction(self) -> None: 

632 datastore = self.makeDatastore() 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 metrics = makeExampleMetrics() 

636 

637 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

639 datastore.put(metrics, refBefore) 

640 with self.assertRaises(TransactionTestError): 

641 with datastore.transaction(): 

642 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}) 

643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

644 datastore.put(metrics, refOuter) 

645 with datastore.transaction(): 

646 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}) 

647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

648 datastore.put(metrics, refInner) 

649 # All datasets should exist 

650 for ref in (refBefore, refOuter, refInner): 

651 metricsOut = datastore.get(ref, parameters=None) 

652 self.assertEqual(metrics, metricsOut) 

653 raise TransactionTestError("This should roll back the transaction") 

654 # Dataset(s) inserted before the transaction should still exist 

655 metricsOut = datastore.get(refBefore, parameters=None) 

656 self.assertEqual(metrics, metricsOut) 

657 # But all datasets inserted during the (rolled back) transaction 

658 # should be gone 

659 with self.assertRaises(FileNotFoundError): 

660 datastore.get(refOuter) 

661 with self.assertRaises(FileNotFoundError): 

662 datastore.get(refInner) 

663 

664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

665 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

666 dimensions = self.universe.extract(("visit", "physical_filter")) 

667 metrics = makeExampleMetrics() 

668 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

670 return metrics, ref 

671 

672 def runIngestTest( 

673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

674 ) -> None: 

675 metrics, ref = self._prepareIngestTest() 

676 # The file will be deleted after the test. 

677 # For symlink tests this leads to a situation where the datastore 

678 # points to a file that does not exist. This will make os.path.exist 

679 # return False but then the new symlink will fail with 

680 # FileExistsError later in the code so the test still passes. 

681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

682 with open(path, "w") as fd: 

683 yaml.dump(metrics._asdict(), stream=fd) 

684 func(metrics, path, ref) 

685 

686 def testIngestNoTransfer(self) -> None: 

687 """Test ingesting existing files with no transfer.""" 

688 for mode in (None, "auto"): 

689 # Some datastores have auto but can't do in place transfer 

690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

691 continue 

692 

693 with self.subTest(mode=mode): 

694 datastore = self.makeDatastore() 

695 

696 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

697 """Ingest a file already in the datastore root.""" 

698 # first move it into the root, and adjust the path 

699 # accordingly 

700 path = shutil.copy(path, datastore.root.ospath) 

701 path = os.path.relpath(path, start=datastore.root.ospath) 

702 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

703 self.assertEqual(obj, datastore.get(ref)) 

704 

705 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

706 """Can't ingest files if we're given a bad path.""" 

707 with self.assertRaises(FileNotFoundError): 

708 datastore.ingest( 

709 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

710 ) 

711 self.assertFalse(datastore.exists(ref)) 

712 

713 def failOutsideRoot(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

714 """Can't ingest files outside of datastore root unless 

715 auto.""" 

716 if mode == "auto": 

717 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

718 self.assertTrue(datastore.exists(ref)) 

719 else: 

720 with self.assertRaises(RuntimeError): 

721 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

722 self.assertFalse(datastore.exists(ref)) 

723 

724 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

725 with self.assertRaises(NotImplementedError): 

726 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

727 

728 if mode in self.ingestTransferModes: 

729 self.runIngestTest(failOutsideRoot) 

730 self.runIngestTest(failInputDoesNotExist) 

731 self.runIngestTest(succeed) 

732 else: 

733 self.runIngestTest(failNotImplemented) 

734 

735 def testIngestTransfer(self) -> None: 

736 """Test ingesting existing files after transferring them.""" 

737 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

738 with self.subTest(mode=mode): 

739 datastore = self.makeDatastore(mode) 

740 

741 def succeed(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

742 """Ingest a file by transferring it to the template 

743 location.""" 

744 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

745 self.assertEqual(obj, datastore.get(ref)) 

746 

747 def failInputDoesNotExist(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

748 """Can't ingest files if we're given a bad path.""" 

749 with self.assertRaises(FileNotFoundError): 

750 # Ensure the file does not look like it is in 

751 # datastore for auto mode 

752 datastore.ingest( 

753 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

754 ) 

755 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

756 

757 def failNotImplemented(obj: MetricsExample, path: str, ref: DatasetRef) -> None: 

758 with self.assertRaises(NotImplementedError): 

759 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

760 

761 if mode in self.ingestTransferModes: 

762 self.runIngestTest(failInputDoesNotExist) 

763 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

764 else: 

765 self.runIngestTest(failNotImplemented) 

766 

767 def testIngestSymlinkOfSymlink(self) -> None: 

768 """Special test for symlink to a symlink ingest""" 

769 metrics, ref = self._prepareIngestTest() 

770 # The aim of this test is to create a dataset on disk, then 

771 # create a symlink to it and finally ingest the symlink such that 

772 # the symlink in the datastore points to the original dataset. 

773 for mode in ("symlink", "relsymlink"): 

774 if mode not in self.ingestTransferModes: 

775 continue 

776 

777 print(f"Trying mode {mode}") 

778 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

779 with open(realpath, "w") as fd: 

780 yaml.dump(metrics._asdict(), stream=fd) 

781 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

782 os.symlink(os.path.abspath(realpath), sympath) 

783 

784 datastore = self.makeDatastore() 

785 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

786 

787 uri = datastore.getURI(ref) 

788 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

789 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

790 

791 linkTarget = os.readlink(uri.ospath) 

792 if mode == "relsymlink": 

793 self.assertFalse(os.path.isabs(linkTarget)) 

794 else: 

795 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

796 

797 # Check that we can get the dataset back regardless of mode 

798 metric2 = datastore.get(ref) 

799 self.assertEqual(metric2, metrics) 

800 

801 # Cleanup the file for next time round loop 

802 # since it will get the same file name in store 

803 datastore.remove(ref) 

804 

805 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

806 datastore = self.makeDatastore(name) 

807 

808 # For now only the FileDatastore can be used for this test. 

809 # ChainedDatastore that only includes InMemoryDatastores have to be 

810 # skipped as well. 

811 for name in datastore.names: 

812 if not name.startswith("InMemoryDatastore"): 

813 break 

814 else: 

815 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

816 

817 metrics = makeExampleMetrics() 

818 dimensions = self.universe.extract(("visit", "physical_filter")) 

819 sc = self.storageClassFactory.getStorageClass("StructuredData") 

820 

821 refs = [] 

822 for visit in (2048, 2049, 2050): 

823 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

824 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

825 datastore.put(metrics, ref) 

826 refs.append(ref) 

827 return datastore, refs 

828 

829 def testExportImportRecords(self) -> None: 

830 """Test for export_records and import_records methods.""" 

831 datastore, refs = self._populate_export_datastore("test_datastore") 

832 for exported_refs in (refs, refs[1:]): 

833 n_refs = len(exported_refs) 

834 records = datastore.export_records(exported_refs) 

835 self.assertGreater(len(records), 0) 

836 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

837 # In a ChainedDatastore each FileDatastore will have a complete set 

838 for datastore_name in records: 

839 record_data = records[datastore_name] 

840 self.assertEqual(len(record_data.records), n_refs) 

841 

842 # Check that subsetting works, include non-existing dataset ID. 

843 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

844 subset = record_data.subset(dataset_ids) 

845 assert subset is not None 

846 self.assertEqual(len(subset.records), 1) 

847 subset = record_data.subset({uuid.uuid4()}) 

848 self.assertIsNone(subset) 

849 

850 # Use the same datastore name to import relative path. 

851 datastore2 = self.makeDatastore("test_datastore") 

852 

853 records = datastore.export_records(refs[1:]) 

854 datastore2.import_records(records) 

855 

856 with self.assertRaises(FileNotFoundError): 

857 data = datastore2.get(refs[0]) 

858 data = datastore2.get(refs[1]) 

859 self.assertIsNotNone(data) 

860 data = datastore2.get(refs[2]) 

861 self.assertIsNotNone(data) 

862 

863 def testExport(self) -> None: 

864 datastore, refs = self._populate_export_datastore("test_datastore") 

865 

866 datasets = list(datastore.export(refs)) 

867 self.assertEqual(len(datasets), 3) 

868 

869 for transfer in (None, "auto"): 

870 # Both will default to None 

871 datasets = list(datastore.export(refs, transfer=transfer)) 

872 self.assertEqual(len(datasets), 3) 

873 

874 with self.assertRaises(TypeError): 

875 list(datastore.export(refs, transfer="copy")) 

876 

877 with self.assertRaises(TypeError): 

878 list(datastore.export(refs, directory="exportDir", transfer="move")) 

879 

880 # Create a new ref that is not known to the datastore and try to 

881 # export it. 

882 sc = self.storageClassFactory.getStorageClass("ThingOne") 

883 dimensions = self.universe.extract(("visit", "physical_filter")) 

884 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

885 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

886 with self.assertRaises(FileNotFoundError): 

887 list(datastore.export(refs + [ref], transfer=None)) 

888 

889 def test_pydantic_dict_storage_class_conversions(self) -> None: 

890 """Test converting a dataset stored as a pydantic model into a dict on 

891 read. 

892 """ 

893 datastore = self.makeDatastore() 

894 store_as_model = self.makeDatasetRef( 

895 "store_as_model", 

896 dimensions=self.universe.empty, 

897 storageClass="DictConvertibleModel", 

898 dataId=DataCoordinate.makeEmpty(self.universe), 

899 ) 

900 content = {"a": "one", "b": "two"} 

901 model = DictConvertibleModel.from_dict(content, extra="original content") 

902 datastore.put(model, store_as_model) 

903 retrieved_model = datastore.get(store_as_model) 

904 self.assertEqual(retrieved_model, model) 

905 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

906 self.assertEqual(type(loaded), dict) 

907 self.assertEqual(loaded, content) 

908 

909 def test_simple_class_put_get(self) -> None: 

910 """Test that we can put and get a simple class with dict() 

911 constructor.""" 

912 datastore = self.makeDatastore() 

913 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

914 self._assert_different_puts(datastore, "MetricsExample", data) 

915 

916 def test_dataclass_put_get(self) -> None: 

917 """Test that we can put and get a simple dataclass.""" 

918 datastore = self.makeDatastore() 

919 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

920 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

921 

922 def test_pydantic_put_get(self) -> None: 

923 """Test that we can put and get a simple Pydantic model.""" 

924 datastore = self.makeDatastore() 

925 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

926 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

927 

928 def test_tuple_put_get(self) -> None: 

929 """Test that we can put and get a tuple.""" 

930 datastore = self.makeDatastore() 

931 data = tuple(["a", "b", 1]) 

932 self._assert_different_puts(datastore, "TupleExample", data) 

933 

934 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

935 refs = { 

936 x: self.makeDatasetRef( 

937 f"stora_as_{x}", 

938 dimensions=self.universe.empty, 

939 storageClass=f"{storageClass_root}{x}", 

940 dataId=DataCoordinate.makeEmpty(self.universe), 

941 ) 

942 for x in ["A", "B"] 

943 } 

944 

945 for ref in refs.values(): 

946 datastore.put(data, ref) 

947 

948 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

949 

950 

951class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

952 """PosixDatastore specialization""" 

953 

954 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

955 uriScheme = "file" 

956 canIngestNoTransferAuto = True 

957 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

958 isEphemeral = False 

959 rootKeys = ("root",) 

960 validationCanFail = True 

961 

962 def setUp(self) -> None: 

963 # Override the working directory before calling the base class 

964 self.root = tempfile.mkdtemp(dir=TESTDIR) 

965 super().setUp() 

966 

967 def testAtomicWrite(self) -> None: 

968 """Test that we write to a temporary and then rename""" 

969 datastore = self.makeDatastore() 

970 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

971 dimensions = self.universe.extract(("visit", "physical_filter")) 

972 metrics = makeExampleMetrics() 

973 

974 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

975 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

976 

977 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

978 datastore.put(metrics, ref) 

979 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

980 self.assertIn("transfer=move", move_logs[0]) 

981 

982 # And the transfer should be file to file. 

983 self.assertEqual(move_logs[0].count("file://"), 2) 

984 

985 def testCanNotDeterminePutFormatterLocation(self) -> None: 

986 """Verify that the expected exception is raised if the FileDatastore 

987 can not determine the put formatter location.""" 

988 

989 _ = makeExampleMetrics() 

990 datastore = self.makeDatastore() 

991 

992 # Create multiple storage classes for testing different formulations 

993 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

994 

995 sccomp = StorageClass("Dummy") 

996 compositeStorageClass = StorageClass( 

997 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

998 ) 

999 

1000 dimensions = self.universe.extract(("visit", "physical_filter")) 

1001 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1002 

1003 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1004 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1005 

1006 def raiser(ref: DatasetRef) -> None: 

1007 raise DatasetTypeNotSupportedError() 

1008 

1009 with unittest.mock.patch.object( 

1010 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1011 "_determine_put_formatter_location", 

1012 side_effect=raiser, 

1013 ): 

1014 # verify the non-composite ref execution path: 

1015 with self.assertRaises(DatasetTypeNotSupportedError): 

1016 datastore.getURIs(ref, predict=True) 

1017 

1018 # verify the composite-ref execution path: 

1019 with self.assertRaises(DatasetTypeNotSupportedError): 

1020 datastore.getURIs(compRef, predict=True) 

1021 

1022 

1023class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1024 """Posix datastore tests but with checksums disabled.""" 

1025 

1026 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1027 

1028 def testChecksum(self) -> None: 

1029 """Ensure that checksums have not been calculated.""" 

1030 

1031 datastore = self.makeDatastore() 

1032 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1033 dimensions = self.universe.extract(("visit", "physical_filter")) 

1034 metrics = makeExampleMetrics() 

1035 

1036 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

1037 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1038 

1039 # Configuration should have disabled checksum calculation 

1040 datastore.put(metrics, ref) 

1041 infos = datastore.getStoredItemsInfo(ref) 

1042 self.assertIsNone(infos[0].checksum) 

1043 

1044 # Remove put back but with checksums enabled explicitly 

1045 datastore.remove(ref) 

1046 datastore.useChecksum = True 

1047 datastore.put(metrics, ref) 

1048 

1049 infos = datastore.getStoredItemsInfo(ref) 

1050 self.assertIsNotNone(infos[0].checksum) 

1051 

1052 

1053class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1054 """Restrict trash test to FileDatastore.""" 

1055 

1056 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1057 

1058 def testTrash(self) -> None: 

1059 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1060 

1061 # Trash one of them. 

1062 ref = refs.pop() 

1063 uri = datastore.getURI(ref) 

1064 datastore.trash(ref) 

1065 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1066 datastore.emptyTrash() 

1067 self.assertFalse(uri.exists(), uri) 

1068 

1069 # Trash it again should be fine. 

1070 datastore.trash(ref) 

1071 

1072 # Trash multiple items at once. 

1073 subset = [refs.pop(), refs.pop()] 

1074 datastore.trash(subset) 

1075 datastore.emptyTrash() 

1076 

1077 # Remove a record and trash should do nothing. 

1078 # This is execution butler scenario. 

1079 ref = refs.pop() 

1080 uri = datastore.getURI(ref) 

1081 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1082 self.assertTrue(uri.exists()) 

1083 datastore.trash(ref) 

1084 datastore.emptyTrash() 

1085 self.assertTrue(uri.exists()) 

1086 

1087 # Switch on trust and it should delete the file. 

1088 datastore.trustGetRequest = True 

1089 datastore.trash([ref]) 

1090 self.assertFalse(uri.exists()) 

1091 

1092 # Remove multiples at once in trust mode. 

1093 subset = [refs.pop() for i in range(3)] 

1094 datastore.trash(subset) 

1095 datastore.trash(refs.pop()) # Check that a single ref can trash 

1096 

1097 

1098class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1099 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1100 

1101 def setUp(self) -> None: 

1102 # Override the working directory before calling the base class 

1103 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1104 super().setUp() 

1105 

1106 def testCleanup(self) -> None: 

1107 """Test that a failed formatter write does cleanup a partial file.""" 

1108 metrics = makeExampleMetrics() 

1109 datastore = self.makeDatastore() 

1110 

1111 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1112 

1113 dimensions = self.universe.extract(("visit", "physical_filter")) 

1114 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1115 

1116 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1117 

1118 # Determine where the file will end up (we assume Formatters use 

1119 # the same file extension) 

1120 expectedUri = datastore.getURI(ref, predict=True) 

1121 self.assertEqual(expectedUri.fragment, "predicted") 

1122 

1123 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1124 

1125 # Try formatter that fails and formatter that fails and leaves 

1126 # a file behind 

1127 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1128 with self.subTest(formatter=formatter): 

1129 # Monkey patch the formatter 

1130 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1131 

1132 # Try to put the dataset, it should fail 

1133 with self.assertRaises(Exception): 

1134 datastore.put(metrics, ref) 

1135 

1136 # Check that there is no file on disk 

1137 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1138 

1139 # Check that there is a directory 

1140 dir = expectedUri.dirname() 

1141 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1142 

1143 # Force YamlFormatter and check that this time a file is written 

1144 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1145 datastore.put(metrics, ref) 

1146 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1147 datastore.remove(ref) 

1148 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1149 

1150 

1151class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1152 """PosixDatastore specialization""" 

1153 

1154 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1155 uriScheme = "mem" 

1156 hasUnsupportedPut = False 

1157 ingestTransferModes = () 

1158 isEphemeral = True 

1159 rootKeys = None 

1160 validationCanFail = False 

1161 

1162 

1163class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1164 """ChainedDatastore specialization using a POSIXDatastore""" 

1165 

1166 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1167 hasUnsupportedPut = False 

1168 canIngestNoTransferAuto = False 

1169 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1170 isEphemeral = False 

1171 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1172 validationCanFail = True 

1173 

1174 

1175class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1176 """ChainedDatastore specialization using all InMemoryDatastore""" 

1177 

1178 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1179 validationCanFail = False 

1180 

1181 

1182class DatastoreConstraintsTests(DatastoreTestsBase): 

1183 """Basic tests of constraints model of Datastores.""" 

1184 

1185 def testConstraints(self) -> None: 

1186 """Test constraints model. Assumes that each test class has the 

1187 same constraints.""" 

1188 metrics = makeExampleMetrics() 

1189 datastore = self.makeDatastore() 

1190 

1191 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1192 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1193 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1194 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}) 

1195 

1196 # Write empty file suitable for ingest check (JSON and YAML variants) 

1197 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1198 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1199 for datasetTypeName, sc, accepted in ( 

1200 ("metric", sc1, True), 

1201 ("metric5", sc1, False), 

1202 ("metric33", sc1, True), 

1203 ("metric5", sc2, True), 

1204 ): 

1205 # Choose different temp file depending on StorageClass 

1206 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1207 

1208 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1209 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1210 if accepted: 

1211 datastore.put(metrics, ref) 

1212 self.assertTrue(datastore.exists(ref)) 

1213 datastore.remove(ref) 

1214 

1215 # Try ingest 

1216 if self.canIngest: 

1217 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1218 self.assertTrue(datastore.exists(ref)) 

1219 datastore.remove(ref) 

1220 else: 

1221 with self.assertRaises(DatasetTypeNotSupportedError): 

1222 datastore.put(metrics, ref) 

1223 self.assertFalse(datastore.exists(ref)) 

1224 

1225 # Again with ingest 

1226 if self.canIngest: 

1227 with self.assertRaises(DatasetTypeNotSupportedError): 

1228 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1229 self.assertFalse(datastore.exists(ref)) 

1230 

1231 

1232class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1233 """PosixDatastore specialization""" 

1234 

1235 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1236 canIngest = True 

1237 

1238 def setUp(self) -> None: 

1239 # Override the working directory before calling the base class 

1240 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1241 super().setUp() 

1242 

1243 

1244class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1245 """InMemoryDatastore specialization""" 

1246 

1247 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1248 canIngest = False 

1249 

1250 

1251class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1252 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1253 at the ChainedDatstore""" 

1254 

1255 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1256 

1257 

1258class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1259 """ChainedDatastore specialization using a POSIXDatastore""" 

1260 

1261 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1262 

1263 

1264class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1265 """ChainedDatastore specialization using all InMemoryDatastore""" 

1266 

1267 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1268 canIngest = False 

1269 

1270 

1271class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1272 """Test that a chained datastore can control constraints per-datastore 

1273 even if child datastore would accept.""" 

1274 

1275 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1276 

1277 def setUp(self) -> None: 

1278 # Override the working directory before calling the base class 

1279 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1280 super().setUp() 

1281 

1282 def testConstraints(self) -> None: 

1283 """Test chained datastore constraints model.""" 

1284 metrics = makeExampleMetrics() 

1285 datastore = self.makeDatastore() 

1286 

1287 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1288 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1289 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1290 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1291 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1292 

1293 # Write empty file suitable for ingest check (JSON and YAML variants) 

1294 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1295 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1296 

1297 for typeName, dataId, sc, accept, ingest in ( 

1298 ("metric", dataId1, sc1, (False, True, False), True), 

1299 ("metric5", dataId1, sc1, (False, False, False), False), 

1300 ("metric5", dataId2, sc1, (True, False, False), False), 

1301 ("metric33", dataId2, sc2, (True, True, False), True), 

1302 ("metric5", dataId1, sc2, (False, True, False), True), 

1303 ): 

1304 # Choose different temp file depending on StorageClass 

1305 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1306 

1307 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1308 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1309 if any(accept): 

1310 datastore.put(metrics, ref) 

1311 self.assertTrue(datastore.exists(ref)) 

1312 

1313 # Check each datastore inside the chained datastore 

1314 for childDatastore, expected in zip(datastore.datastores, accept): 

1315 self.assertEqual( 

1316 childDatastore.exists(ref), 

1317 expected, 

1318 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1319 ) 

1320 

1321 datastore.remove(ref) 

1322 

1323 # Check that ingest works 

1324 if ingest: 

1325 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1326 self.assertTrue(datastore.exists(ref)) 

1327 

1328 # Check each datastore inside the chained datastore 

1329 for childDatastore, expected in zip(datastore.datastores, accept): 

1330 # Ephemeral datastores means InMemory at the moment 

1331 # and that does not accept ingest of files. 

1332 if childDatastore.isEphemeral: 

1333 expected = False 

1334 self.assertEqual( 

1335 childDatastore.exists(ref), 

1336 expected, 

1337 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1338 ) 

1339 

1340 datastore.remove(ref) 

1341 else: 

1342 with self.assertRaises(DatasetTypeNotSupportedError): 

1343 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1344 

1345 else: 

1346 with self.assertRaises(DatasetTypeNotSupportedError): 

1347 datastore.put(metrics, ref) 

1348 self.assertFalse(datastore.exists(ref)) 

1349 

1350 # Again with ingest 

1351 with self.assertRaises(DatasetTypeNotSupportedError): 

1352 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1353 self.assertFalse(datastore.exists(ref)) 

1354 

1355 

1356class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1357 """Tests for datastore caching infrastructure.""" 

1358 

1359 @classmethod 

1360 def setUpClass(cls) -> None: 

1361 cls.storageClassFactory = StorageClassFactory() 

1362 cls.universe = DimensionUniverse() 

1363 

1364 # Ensure that we load the test storage class definitions. 

1365 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1366 cls.storageClassFactory.addFromConfig(scConfigFile) 

1367 

1368 def setUp(self) -> None: 

1369 self.id = 0 

1370 

1371 # Create a root that we can use for caching tests. 

1372 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1373 

1374 # Create some test dataset refs and associated test files 

1375 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1376 dimensions = self.universe.extract(("visit", "physical_filter")) 

1377 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1378 

1379 # Create list of refs and list of temporary files 

1380 n_datasets = 10 

1381 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1382 

1383 root_uri = ResourcePath(self.root, forceDirectory=True) 

1384 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1385 

1386 # Create test files. 

1387 for uri in self.files: 

1388 uri.write(b"0123456789") 

1389 

1390 # Create some composite refs with component files. 

1391 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1392 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1393 self.comp_files = [] 

1394 self.comp_refs = [] 

1395 for n, ref in enumerate(self.composite_refs): 

1396 component_refs = [] 

1397 component_files = [] 

1398 for component in sc.components: 

1399 component_ref = ref.makeComponentRef(component) 

1400 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1401 component_refs.append(component_ref) 

1402 component_files.append(file) 

1403 file.write(b"9876543210") 

1404 

1405 self.comp_files.append(component_files) 

1406 self.comp_refs.append(component_refs) 

1407 

1408 def tearDown(self) -> None: 

1409 if self.root is not None and os.path.exists(self.root): 

1410 shutil.rmtree(self.root, ignore_errors=True) 

1411 

1412 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1413 config = Config.fromYaml(config_str) 

1414 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1415 

1416 def testNoCacheDir(self) -> None: 

1417 config_str = """ 

1418cached: 

1419 root: null 

1420 cacheable: 

1421 metric0: true 

1422 """ 

1423 cache_manager = self._make_cache_manager(config_str) 

1424 

1425 # Look inside to check we don't have a cache directory 

1426 self.assertIsNone(cache_manager._cache_directory) 

1427 

1428 self.assertCache(cache_manager) 

1429 

1430 # Test that the cache directory is marked temporary 

1431 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1432 

1433 def testNoCacheDirReversed(self) -> None: 

1434 """Use default caching status and metric1 to false""" 

1435 config_str = """ 

1436cached: 

1437 root: null 

1438 default: true 

1439 cacheable: 

1440 metric1: false 

1441 """ 

1442 cache_manager = self._make_cache_manager(config_str) 

1443 

1444 self.assertCache(cache_manager) 

1445 

1446 def testEnvvarCacheDir(self) -> None: 

1447 config_str = f""" 

1448cached: 

1449 root: '{self.root}' 

1450 cacheable: 

1451 metric0: true 

1452 """ 

1453 

1454 root = ResourcePath(self.root, forceDirectory=True) 

1455 env_dir = root.join("somewhere", forceDirectory=True) 

1456 elsewhere = root.join("elsewhere", forceDirectory=True) 

1457 

1458 # Environment variable should override the config value. 

1459 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1460 cache_manager = self._make_cache_manager(config_str) 

1461 self.assertEqual(cache_manager.cache_directory, env_dir) 

1462 

1463 # This environment variable should not override the config value. 

1464 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1465 cache_manager = self._make_cache_manager(config_str) 

1466 self.assertEqual(cache_manager.cache_directory, root) 

1467 

1468 # No default setting. 

1469 config_str = """ 

1470cached: 

1471 root: null 

1472 default: true 

1473 cacheable: 

1474 metric1: false 

1475 """ 

1476 cache_manager = self._make_cache_manager(config_str) 

1477 

1478 # This environment variable should override the config value. 

1479 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1480 cache_manager = self._make_cache_manager(config_str) 

1481 self.assertEqual(cache_manager.cache_directory, env_dir) 

1482 

1483 # If both environment variables are set the main (not IF_UNSET) 

1484 # variable should win. 

1485 with unittest.mock.patch.dict( 

1486 os.environ, 

1487 { 

1488 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1489 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1490 }, 

1491 ): 

1492 cache_manager = self._make_cache_manager(config_str) 

1493 self.assertEqual(cache_manager.cache_directory, env_dir) 

1494 

1495 # Use the API to set the environment variable, making sure that the 

1496 # variable is reset on exit. 

1497 with unittest.mock.patch.dict( 

1498 os.environ, 

1499 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1500 ): 

1501 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1502 self.assertTrue(defined) 

1503 cache_manager = self._make_cache_manager(config_str) 

1504 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1505 

1506 # Now create the cache manager ahead of time and set the fallback 

1507 # later. 

1508 cache_manager = self._make_cache_manager(config_str) 

1509 self.assertIsNone(cache_manager._cache_directory) 

1510 with unittest.mock.patch.dict( 

1511 os.environ, 

1512 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1513 ): 

1514 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1515 self.assertTrue(defined) 

1516 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1517 

1518 def testExplicitCacheDir(self) -> None: 

1519 config_str = f""" 

1520cached: 

1521 root: '{self.root}' 

1522 cacheable: 

1523 metric0: true 

1524 """ 

1525 cache_manager = self._make_cache_manager(config_str) 

1526 

1527 # Look inside to check we do have a cache directory. 

1528 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1529 

1530 self.assertCache(cache_manager) 

1531 

1532 # Test that the cache directory is not marked temporary 

1533 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1534 

1535 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1536 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1537 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1538 

1539 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1540 self.assertIsInstance(uri, ResourcePath) 

1541 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1542 

1543 # Check presence in cache using ref and then using file extension. 

1544 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1545 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1546 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1547 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1548 

1549 # Cached file should no longer exist but uncached file should be 

1550 # unaffected. 

1551 self.assertFalse(self.files[0].exists()) 

1552 self.assertTrue(self.files[1].exists()) 

1553 

1554 # Should find this file and it should be within the cache directory. 

1555 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1556 self.assertTrue(found.exists()) 

1557 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1558 

1559 # Should not be able to find these in cache 

1560 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1561 self.assertIsNone(found) 

1562 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1563 self.assertIsNone(found) 

1564 

1565 def testNoCache(self) -> None: 

1566 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1567 for uri, ref in zip(self.files, self.refs): 

1568 self.assertFalse(cache_manager.should_be_cached(ref)) 

1569 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1570 self.assertFalse(cache_manager.known_to_cache(ref)) 

1571 with cache_manager.find_in_cache(ref, ".txt") as found: 

1572 self.assertIsNone(found, msg=f"{cache_manager}") 

1573 

1574 def _expiration_config(self, mode: str, threshold: int) -> str: 

1575 return f""" 

1576cached: 

1577 default: true 

1578 expiry: 

1579 mode: {mode} 

1580 threshold: {threshold} 

1581 cacheable: 

1582 unused: true 

1583 """ 

1584 

1585 def testCacheExpiryFiles(self) -> None: 

1586 threshold = 2 # Keep at least 2 files. 

1587 mode = "files" 

1588 config_str = self._expiration_config(mode, threshold) 

1589 

1590 cache_manager = self._make_cache_manager(config_str) 

1591 

1592 # Check that an empty cache returns unknown for arbitrary ref 

1593 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1594 

1595 # Should end with datasets: 2, 3, 4 

1596 self.assertExpiration(cache_manager, 5, threshold + 1) 

1597 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1598 

1599 # Check that we will not expire a file that is actively in use. 

1600 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1601 self.assertIsNotNone(found) 

1602 

1603 # Trigger cache expiration that should remove the file 

1604 # we just retrieved. Should now have: 3, 4, 5 

1605 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1606 self.assertIsNotNone(cached) 

1607 

1608 # Cache should still report the standard file count. 

1609 self.assertEqual(cache_manager.file_count, threshold + 1) 

1610 

1611 # Add additional entry to cache. 

1612 # Should now have 4, 5, 6 

1613 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1614 self.assertIsNotNone(cached) 

1615 

1616 # Is the file still there? 

1617 self.assertTrue(found.exists()) 

1618 

1619 # Can we read it? 

1620 data = found.read() 

1621 self.assertGreater(len(data), 0) 

1622 

1623 # Outside context the file should no longer exist. 

1624 self.assertFalse(found.exists()) 

1625 

1626 # File count should not have changed. 

1627 self.assertEqual(cache_manager.file_count, threshold + 1) 

1628 

1629 # Dataset 2 was in the exempt directory but because hardlinks 

1630 # are used it was deleted from the main cache during cache expiry 

1631 # above and so should no longer be found. 

1632 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1633 self.assertIsNone(found) 

1634 

1635 # And the one stored after it is also gone. 

1636 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1637 self.assertIsNone(found) 

1638 

1639 # But dataset 4 is present. 

1640 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1641 self.assertIsNotNone(found) 

1642 

1643 # Adding a new dataset to the cache should now delete it. 

1644 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1645 

1646 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1647 self.assertIsNone(found) 

1648 

1649 def testCacheExpiryDatasets(self) -> None: 

1650 threshold = 2 # Keep 2 datasets. 

1651 mode = "datasets" 

1652 config_str = self._expiration_config(mode, threshold) 

1653 

1654 cache_manager = self._make_cache_manager(config_str) 

1655 self.assertExpiration(cache_manager, 5, threshold + 1) 

1656 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1657 

1658 def testCacheExpiryDatasetsComposite(self) -> None: 

1659 threshold = 2 # Keep 2 datasets. 

1660 mode = "datasets" 

1661 config_str = self._expiration_config(mode, threshold) 

1662 

1663 cache_manager = self._make_cache_manager(config_str) 

1664 

1665 n_datasets = 3 

1666 for i in range(n_datasets): 

1667 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1668 cached = cache_manager.move_to_cache(component_file, component_ref) 

1669 self.assertIsNotNone(cached) 

1670 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1671 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1672 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1673 

1674 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1675 

1676 # Write two new non-composite and the number of files should drop. 

1677 self.assertExpiration(cache_manager, 2, 5) 

1678 

1679 def testCacheExpirySize(self) -> None: 

1680 threshold = 55 # Each file is 10 bytes 

1681 mode = "size" 

1682 config_str = self._expiration_config(mode, threshold) 

1683 

1684 cache_manager = self._make_cache_manager(config_str) 

1685 self.assertExpiration(cache_manager, 10, 6) 

1686 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1687 

1688 def assertExpiration( 

1689 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1690 ) -> None: 

1691 """Insert the datasets and then check the number retained.""" 

1692 for i in range(n_datasets): 

1693 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1694 self.assertIsNotNone(cached) 

1695 

1696 self.assertEqual(cache_manager.file_count, n_retained) 

1697 

1698 # The oldest file should not be in the cache any more. 

1699 for i in range(n_datasets): 

1700 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1701 if i >= n_datasets - n_retained: 

1702 self.assertIsInstance(found, ResourcePath) 

1703 else: 

1704 self.assertIsNone(found) 

1705 

1706 def testCacheExpiryAge(self) -> None: 

1707 threshold = 1 # Expire older than 2 seconds 

1708 mode = "age" 

1709 config_str = self._expiration_config(mode, threshold) 

1710 

1711 cache_manager = self._make_cache_manager(config_str) 

1712 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1713 

1714 # Insert 3 files, then sleep, then insert more. 

1715 for i in range(2): 

1716 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1717 self.assertIsNotNone(cached) 

1718 time.sleep(2.0) 

1719 for j in range(4): 

1720 i = 2 + j # Continue the counting 

1721 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1722 self.assertIsNotNone(cached) 

1723 

1724 # Only the files written after the sleep should exist. 

1725 self.assertEqual(cache_manager.file_count, 4) 

1726 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1727 self.assertIsNone(found) 

1728 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1729 self.assertIsInstance(found, ResourcePath) 

1730 

1731 

1732class DatasetRefURIsTestCase(unittest.TestCase): 

1733 """Tests for DatasetRefURIs.""" 

1734 

1735 def testSequenceAccess(self) -> None: 

1736 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1737 uris = DatasetRefURIs() 

1738 

1739 self.assertEqual(len(uris), 2) 

1740 self.assertEqual(uris[0], None) 

1741 self.assertEqual(uris[1], {}) 

1742 

1743 primaryURI = ResourcePath("1/2/3") 

1744 componentURI = ResourcePath("a/b/c") 

1745 

1746 # affirm that DatasetRefURIs does not support MutableSequence functions 

1747 with self.assertRaises(TypeError): 

1748 uris[0] = primaryURI 

1749 with self.assertRaises(TypeError): 

1750 uris[1] = {"foo": componentURI} 

1751 

1752 # but DatasetRefURIs can be set by property name: 

1753 uris.primaryURI = primaryURI 

1754 uris.componentURIs = {"foo": componentURI} 

1755 self.assertEqual(uris.primaryURI, primaryURI) 

1756 self.assertEqual(uris[0], primaryURI) 

1757 

1758 primary, components = uris 

1759 self.assertEqual(primary, primaryURI) 

1760 self.assertEqual(components, {"foo": componentURI}) 

1761 

1762 def testRepr(self) -> None: 

1763 """Verify __repr__ output.""" 

1764 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1765 self.assertEqual( 

1766 repr(uris), 

1767 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1768 ) 

1769 

1770 

1771class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1772 storageClassFactory = StorageClassFactory() 

1773 

1774 def test_StoredFileInfo(self) -> None: 

1775 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1776 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1777 

1778 record = dict( 

1779 storage_class="StructuredDataDict", 

1780 formatter="lsst.daf.butler.Formatter", 

1781 path="a/b/c.txt", 

1782 component="component", 

1783 dataset_id=ref.id, 

1784 checksum=None, 

1785 file_size=5, 

1786 ) 

1787 info = StoredFileInfo.from_record(record) 

1788 

1789 self.assertEqual(info.dataset_id, ref.id) 

1790 self.assertEqual(info.to_record(), record) 

1791 

1792 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1793 rebased = info.rebase(ref2) 

1794 self.assertEqual(rebased.dataset_id, ref2.id) 

1795 self.assertEqual(rebased.rebase(ref), info) 

1796 

1797 with self.assertRaises(TypeError): 

1798 rebased.update(formatter=42) 

1799 

1800 with self.assertRaises(ValueError): 

1801 rebased.update(something=42, new="42") 

1802 

1803 # Check that pickle works on StoredFileInfo. 

1804 pickled_info = pickle.dumps(info) 

1805 unpickled_info = pickle.loads(pickled_info) 

1806 self.assertEqual(unpickled_info, info) 

1807 

1808 

1809if __name__ == "__main__": 

1810 unittest.main()