Coverage for python / lsst / daf / butler / datastores / inMemoryDatastore.py: 0%

212 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 08:41 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""In-memory datastore.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("InMemoryDatastore", "StoredMemoryItemInfo") 

33 

34import logging 

35import time 

36from collections.abc import Collection, Iterable, Mapping 

37from dataclasses import dataclass 

38from typing import TYPE_CHECKING, Any 

39from urllib.parse import urlencode 

40 

41from lsst.daf.butler import DatasetId, DatasetRef, StorageClass 

42from lsst.daf.butler._exceptions import DatasetTypeNotSupportedError 

43from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig 

44from lsst.daf.butler.datastore.generic_base import GenericBaseDatastore, post_process_get 

45from lsst.daf.butler.datastore.record_data import DatastoreRecordData 

46from lsst.daf.butler.datastore.stored_file_info import StoredDatastoreItemInfo 

47from lsst.daf.butler.utils import transactional 

48from lsst.resources import ResourcePath, ResourcePathExpression 

49 

50if TYPE_CHECKING: 

51 from lsst.daf.butler import Config, DatasetProvenance, DatasetType, LookupKey 

52 from lsst.daf.butler.datastore import DatastoreOpaqueTable 

53 from lsst.daf.butler.datastores.file_datastore.retrieve_artifacts import ArtifactIndexInfo 

54 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

55 

56log = logging.getLogger(__name__) 

57 

58 

59@dataclass(frozen=True, slots=True) 

60class StoredMemoryItemInfo(StoredDatastoreItemInfo): 

61 """Internal InMemoryDatastore Metadata associated with a stored 

62 DatasetRef. 

63 """ 

64 

65 timestamp: float 

66 """Unix timestamp indicating the time the dataset was stored.""" 

67 

68 storageClass: StorageClass 

69 """StorageClass associated with the dataset.""" 

70 

71 parentID: DatasetId 

72 """ID of the parent `DatasetRef` if this entry is a concrete 

73 composite. Not used if the dataset being stored is not a 

74 virtual component of a composite 

75 """ 

76 

77 

78class InMemoryDatastore(GenericBaseDatastore[StoredMemoryItemInfo]): 

79 """Basic Datastore for writing to an in memory cache. 

80 

81 This datastore is ephemeral in that the contents of the datastore 

82 disappear when the Python process completes. This also means that 

83 other processes can not access this datastore. 

84 

85 Parameters 

86 ---------- 

87 config : `DatastoreConfig` or `str` 

88 Configuration. 

89 bridgeManager : `DatastoreRegistryBridgeManager` 

90 Object that manages the interface between `Registry` and datastores. 

91 

92 Notes 

93 ----- 

94 InMemoryDatastore does not support any file-based ingest. 

95 """ 

96 

97 defaultConfigFile = "datastores/inMemoryDatastore.yaml" 

98 """Path to configuration defaults. Accessed within the ``configs`` resource 

99 or relative to a search path. Can be None if no defaults specified. 

100 """ 

101 

102 isEphemeral = True 

103 """A new datastore is created every time and datasets disappear when 

104 the process shuts down.""" 

105 

106 datasets: dict[DatasetId, Any] 

107 """Internal storage of datasets indexed by dataset ID.""" 

108 

109 records: dict[DatasetId, StoredMemoryItemInfo] 

110 """Internal records about stored datasets.""" 

111 

112 def __init__( 

113 self, 

114 config: DatastoreConfig, 

115 bridgeManager: DatastoreRegistryBridgeManager, 

116 ): 

117 super().__init__(config, bridgeManager) 

118 

119 # Name ourselves with the timestamp the datastore 

120 # was created. 

121 self.name = f"{type(self).__name__}@{time.time()}" 

122 log.debug("Creating datastore %s", self.name) 

123 

124 # Storage of datasets, keyed by dataset_id 

125 self.datasets: dict[DatasetId, Any] = {} 

126 

127 # Records is distinct in order to track concrete composite components 

128 # where we register multiple components for a single dataset. 

129 self.records: dict[DatasetId, StoredMemoryItemInfo] = {} 

130 

131 # Related records that share the same parent 

132 self.related: dict[DatasetId, set[DatasetId]] = {} 

133 

134 self._trashedIds: set[DatasetId] = set() 

135 

136 @classmethod 

137 def _create_from_config( 

138 cls, 

139 config: DatastoreConfig, 

140 bridgeManager: DatastoreRegistryBridgeManager, 

141 butlerRoot: ResourcePathExpression | None, 

142 ) -> InMemoryDatastore: 

143 return InMemoryDatastore(config, bridgeManager) 

144 

145 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> InMemoryDatastore: 

146 clone = InMemoryDatastore(self.config, bridgeManager) 

147 # Sharing these objects is not thread-safe, but this class is only used 

148 # in single-threaded test code. 

149 clone.datasets = self.datasets 

150 clone.records = self.records 

151 clone.related = self.related 

152 clone._trashedIds = self._trashedIds 

153 return clone 

154 

155 @classmethod 

156 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

157 """Set any filesystem-dependent config options for this Datastore to 

158 be appropriate for a new empty repository with the given root. 

159 

160 Does nothing in this implementation. 

161 

162 Parameters 

163 ---------- 

164 root : `str` 

165 Filesystem path to the root of the data repository. 

166 config : `Config` 

167 A `Config` to update. Only the subset understood by 

168 this component will be updated. Will not expand 

169 defaults. 

170 full : `Config` 

171 A complete config with all defaults expanded that can be 

172 converted to a `DatastoreConfig`. Read-only and will not be 

173 modified by this method. 

174 Repository-specific options that should not be obtained 

175 from defaults when Butler instances are constructed 

176 should be copied from ``full`` to ``config``. 

177 overwrite : `bool`, optional 

178 If `False`, do not modify a value in ``config`` if the value 

179 already exists. Default is always to overwrite with the provided 

180 ``root``. 

181 

182 Notes 

183 ----- 

184 If a keyword is explicitly defined in the supplied ``config`` it 

185 will not be overridden by this method if ``overwrite`` is `False`. 

186 This allows explicit values set in external configs to be retained. 

187 """ 

188 return 

189 

190 def _get_stored_item_info(self, dataset_id: DatasetId) -> StoredMemoryItemInfo: 

191 # Docstring inherited from GenericBaseDatastore. 

192 return self.records[dataset_id] 

193 

194 def _remove_stored_item_info(self, dataset_id: DatasetId) -> None: 

195 # Docstring inherited from GenericBaseDatastore. 

196 # If a component has been removed previously then we can sometimes 

197 # be asked to remove it again. Other datastores ignore this 

198 # so also ignore here 

199 if dataset_id not in self.records: 

200 return 

201 record = self.records[dataset_id] 

202 del self.records[dataset_id] 

203 self.related[record.parentID].remove(dataset_id) 

204 

205 def removeStoredItemInfo(self, ref: DatasetIdRef) -> None: 

206 """Remove information about the file associated with this dataset. 

207 

208 Parameters 

209 ---------- 

210 ref : `DatasetRef` 

211 The dataset that has been removed. 

212 

213 Notes 

214 ----- 

215 This method is actually not used by this implementation, but there are 

216 some tests that check that this method works, so we keep it for now. 

217 """ 

218 self._remove_stored_item_info(ref.id) 

219 

220 def _get_dataset_info(self, dataset_id: DatasetId) -> tuple[DatasetId, StoredMemoryItemInfo]: 

221 """Check that the dataset is present and return the real ID and 

222 associated information. 

223 

224 Parameters 

225 ---------- 

226 dataset_id : `DatasetRef` 

227 Target `DatasetRef` 

228 

229 Returns 

230 ------- 

231 realID : `int` 

232 The dataset ID associated with this ref that should be used. This 

233 could either be the ID of the supplied `DatasetRef` or the parent. 

234 storageInfo : `StoredMemoryItemInfo` 

235 Associated storage information. 

236 

237 Raises 

238 ------ 

239 FileNotFoundError 

240 Raised if the dataset is not present in this datastore. 

241 """ 

242 try: 

243 storedItemInfo = self._get_stored_item_info(dataset_id) 

244 except KeyError: 

245 raise FileNotFoundError(f"No such file dataset in memory: {dataset_id}") from None 

246 realID = dataset_id 

247 if storedItemInfo.parentID is not None: 

248 realID = storedItemInfo.parentID 

249 

250 if realID not in self.datasets: 

251 raise FileNotFoundError(f"No such file dataset in memory: {dataset_id}") 

252 

253 return realID, storedItemInfo 

254 

255 def knows(self, ref: DatasetRef) -> bool: 

256 """Check if the dataset is known to the datastore. 

257 

258 This datastore does not distinguish dataset existence from knowledge 

259 of a dataset. 

260 

261 Parameters 

262 ---------- 

263 ref : `DatasetRef` 

264 Reference to the required dataset. 

265 

266 Returns 

267 ------- 

268 exists : `bool` 

269 `True` if the dataset is known to the datastore. 

270 """ 

271 return self.exists(ref) 

272 

273 def exists(self, ref: DatasetRef) -> bool: 

274 """Check if the dataset exists in the datastore. 

275 

276 Parameters 

277 ---------- 

278 ref : `DatasetRef` 

279 Reference to the required dataset. 

280 

281 Returns 

282 ------- 

283 exists : `bool` 

284 `True` if the entity exists in the `Datastore`. 

285 """ 

286 try: 

287 self._get_dataset_info(ref.id) 

288 except FileNotFoundError: 

289 return False 

290 return True 

291 

292 def get( 

293 self, 

294 ref: DatasetRef, 

295 parameters: Mapping[str, Any] | None = None, 

296 storageClass: StorageClass | str | None = None, 

297 ) -> Any: 

298 """Load an InMemoryDataset from the store. 

299 

300 Parameters 

301 ---------- 

302 ref : `DatasetRef` 

303 Reference to the required Dataset. 

304 parameters : `dict` 

305 `StorageClass`-specific parameters that specify, for example, 

306 a slice of the dataset to be loaded. 

307 storageClass : `StorageClass` or `str`, optional 

308 The storage class to be used to override the Python type 

309 returned by this method. By default the returned type matches 

310 the dataset type definition for this dataset. Specifying a 

311 read `StorageClass` can force a different type to be returned. 

312 This type must be compatible with the original type. 

313 

314 Returns 

315 ------- 

316 inMemoryDataset : `object` 

317 Requested dataset or slice thereof as an InMemoryDataset. 

318 

319 Raises 

320 ------ 

321 FileNotFoundError 

322 Requested dataset can not be retrieved. 

323 TypeError 

324 Return value from formatter has unexpected type. 

325 ValueError 

326 Formatter failed to process the dataset. 

327 """ 

328 log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) 

329 

330 realID, storedItemInfo = self._get_dataset_info(ref.id) 

331 

332 # We have a write storage class and a read storage class and they 

333 # can be different for concrete composites or if overridden. 

334 if storageClass is not None: 

335 ref = ref.overrideStorageClass(storageClass) 

336 refStorageClass = ref.datasetType.storageClass 

337 writeStorageClass = storedItemInfo.storageClass 

338 

339 component = ref.datasetType.component() 

340 

341 # Check that the supplied parameters are suitable for the type read 

342 # If this is a derived component we validate against the composite 

343 isDerivedComponent = False 

344 if component in writeStorageClass.derivedComponents: 

345 writeStorageClass.validateParameters(parameters) 

346 isDerivedComponent = True 

347 else: 

348 refStorageClass.validateParameters(parameters) 

349 

350 inMemoryDataset = self.datasets[realID] 

351 

352 # if this is a read only component we need to apply parameters 

353 # before we retrieve the component. We assume that the parameters 

354 # will affect the data globally, before the derived component 

355 # is selected. 

356 if isDerivedComponent: 

357 inMemoryDataset = writeStorageClass.delegate().handleParameters(inMemoryDataset, parameters) 

358 # Then disable parameters for later 

359 parameters = {} 

360 

361 # Check if we have a component. 

362 if component: 

363 # In-memory datastore must have stored the dataset as a single 

364 # object in the write storage class. We therefore use that 

365 # storage class delegate to obtain the component. 

366 inMemoryDataset = writeStorageClass.delegate().getComponent(inMemoryDataset, component) 

367 

368 # Since there is no formatter to process parameters, they all must be 

369 # passed to the assembler. 

370 inMemoryDataset = post_process_get( 

371 inMemoryDataset, refStorageClass, parameters, isComponent=component is not None 

372 ) 

373 

374 # Last minute type conversion. 

375 return refStorageClass.coerce_type(inMemoryDataset) 

376 

377 def put(self, inMemoryDataset: Any, ref: DatasetRef, provenance: DatasetProvenance | None = None) -> None: 

378 """Write a InMemoryDataset with a given `DatasetRef` to the store. 

379 

380 Parameters 

381 ---------- 

382 inMemoryDataset : `object` 

383 The dataset to store. 

384 ref : `DatasetRef` 

385 Reference to the associated Dataset. 

386 provenance : `DatasetProvenance` or `None`, optional 

387 Any provenance that should be attached to the serialized dataset. 

388 

389 Raises 

390 ------ 

391 TypeError 

392 Supplied object and storage class are inconsistent. 

393 DatasetTypeNotSupportedError 

394 The associated `DatasetType` is not handled by this datastore. 

395 

396 Notes 

397 ----- 

398 If the datastore is configured to reject certain dataset types it 

399 is possible that the put will fail and raise a 

400 `DatasetTypeNotSupportedError`. The main use case for this is to 

401 allow `ChainedDatastore` to put to multiple datastores without 

402 requiring that every datastore accepts the dataset. 

403 """ 

404 if not self.constraints.isAcceptable(ref): 

405 # Raise rather than use boolean return value. 

406 raise DatasetTypeNotSupportedError( 

407 f"Dataset {ref} has been rejected by this datastore via configuration." 

408 ) 

409 

410 # May need to coerce the in memory dataset to the correct 

411 # python type, otherwise parameters may not work. 

412 try: 

413 delegate = ref.datasetType.storageClass.delegate() 

414 except TypeError: 

415 # TypeError is raised when a storage class doesn't have a delegate. 

416 delegate = None 

417 if not delegate or not delegate.can_accept(inMemoryDataset): 

418 inMemoryDataset = ref.datasetType.storageClass.coerce_type(inMemoryDataset) 

419 

420 # Update provenance. 

421 if delegate: 

422 inMemoryDataset = delegate.add_provenance(inMemoryDataset, ref, provenance=provenance) 

423 

424 self.datasets[ref.id] = inMemoryDataset 

425 log.debug("Store %s in %s", ref, self.name) 

426 

427 # Store time we received this content, to allow us to optionally 

428 # expire it. Instead of storing a filename here, we include the 

429 # ID of this datasetRef so we can find it from components. 

430 itemInfo = StoredMemoryItemInfo(time.time(), ref.datasetType.storageClass, parentID=ref.id) 

431 

432 # We have to register this content with registry. 

433 # Currently this assumes we have a file so we need to use stub entries 

434 self.records[ref.id] = itemInfo 

435 self.related.setdefault(itemInfo.parentID, set()).add(ref.id) 

436 

437 if self._transaction is not None: 

438 self._transaction.registerUndo("put", self.remove, ref) 

439 

440 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]: 

441 # It is OK to call put() here because registry is not populating 

442 # bridges as we return empty dict from this method. 

443 self.put(in_memory_dataset, ref) 

444 # As ephemeral we return empty dict. 

445 return {} 

446 

447 def getURIs(self, ref: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

448 """Return URIs associated with dataset. 

449 

450 Parameters 

451 ---------- 

452 ref : `DatasetRef` 

453 Reference to the required dataset. 

454 predict : `bool`, optional 

455 If the datastore does not know about the dataset, controls whether 

456 it should return a predicted URI or not. 

457 

458 Returns 

459 ------- 

460 uris : `DatasetRefURIs` 

461 The URI to the primary artifact associated with this dataset (if 

462 the dataset was disassembled within the datastore this may be 

463 `None`), and the URIs to any components associated with the dataset 

464 artifact. (can be empty if there are no components). 

465 

466 Notes 

467 ----- 

468 The URIs returned for in-memory datastores are not usable but 

469 provide an indication of the associated dataset. 

470 """ 

471 # Include the dataID as a URI query 

472 query = urlencode(ref.dataId.required) 

473 

474 # if this has never been written then we have to guess 

475 if not self.exists(ref): 

476 if not predict: 

477 raise FileNotFoundError(f"Dataset {ref} not in this datastore") 

478 name = f"{ref.datasetType.name}" 

479 fragment = "#predicted" 

480 else: 

481 realID, _ = self._get_dataset_info(ref.id) 

482 name = f"{id(self.datasets[realID])}_{ref.id}" 

483 fragment = "" 

484 

485 return DatasetRefURIs(ResourcePath(f"mem://{name}?{query}{fragment}"), {}) 

486 

487 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath: 

488 """URI to the Dataset. 

489 

490 Always uses "mem://" URI prefix. 

491 

492 Parameters 

493 ---------- 

494 ref : `DatasetRef` 

495 Reference to the required Dataset. 

496 predict : `bool` 

497 If `True`, allow URIs to be returned of datasets that have not 

498 been written. 

499 

500 Returns 

501 ------- 

502 uri : `str` 

503 URI pointing to the dataset within the datastore. If the 

504 dataset does not exist in the datastore, and if ``predict`` is 

505 `True`, the URI will be a prediction and will include a URI 

506 fragment "#predicted". 

507 If the datastore does not have entities that relate well 

508 to the concept of a URI the returned URI string will be 

509 descriptive. The returned URI is not guaranteed to be obtainable. 

510 

511 Raises 

512 ------ 

513 FileNotFoundError 

514 A URI has been requested for a dataset that does not exist and 

515 guessing is not allowed. 

516 AssertionError 

517 Raised if an internal error occurs. 

518 """ 

519 primary, _ = self.getURIs(ref, predict) 

520 if primary is None: 

521 # This should be impossible since this datastore does 

522 # not disassemble. This check also helps mypy. 

523 raise AssertionError(f"Unexpectedly got no URI for in-memory datastore for {ref}") 

524 return primary 

525 

526 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None: 

527 raise NotImplementedError("Can only ingest a Zip into a file datastore.") 

528 

529 def retrieveArtifacts( 

530 self, 

531 refs: Iterable[DatasetRef], 

532 destination: ResourcePath, 

533 transfer: str = "auto", 

534 preserve_path: bool = True, 

535 overwrite: bool | None = False, 

536 write_index: bool = True, 

537 add_prefix: bool = False, 

538 ) -> dict[ResourcePath, ArtifactIndexInfo]: 

539 """Retrieve the file artifacts associated with the supplied refs. 

540 

541 Parameters 

542 ---------- 

543 refs : `~collections.abc.Iterable` of `DatasetRef` 

544 The datasets for which artifacts are to be retrieved. 

545 A single ref can result in multiple artifacts. The refs must 

546 be resolved. 

547 destination : `lsst.resources.ResourcePath` 

548 Location to write the artifacts. 

549 transfer : `str`, optional 

550 Method to use to transfer the artifacts. Must be one of the options 

551 supported by `lsst.resources.ResourcePath.transfer_from`. 

552 "move" is not allowed. 

553 preserve_path : `bool`, optional 

554 If `True` the full path of the artifact within the datastore 

555 is preserved. If `False` the final file component of the path 

556 is used. 

557 overwrite : `bool`, optional 

558 If `True` allow transfers to overwrite existing files at the 

559 destination. 

560 write_index : `bool`, optional 

561 If `True` write a file at the top level containing a serialization 

562 of a `ZipIndex` for the downloaded datasets. 

563 add_prefix : `bool`, optional 

564 If `True` and if ``preserve_path`` is `False`, apply a prefix to 

565 the filenames corresponding to some part of the dataset ref ID. 

566 This can be used to guarantee uniqueness. 

567 

568 Notes 

569 ----- 

570 Not implemented by this datastore. 

571 """ 

572 # Could conceivably launch a FileDatastore to use formatters to write 

573 # the data but this is fraught with problems. 

574 raise NotImplementedError("Can not write artifacts to disk from in-memory datastore.") 

575 

576 def forget(self, refs: Iterable[DatasetRef]) -> None: 

577 # Docstring inherited. 

578 refs = list(refs) 

579 for ref in refs: 

580 self._remove_stored_item_info(ref.id) 

581 

582 @transactional 

583 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = False) -> None: 

584 """Indicate to the Datastore that a dataset can be removed. 

585 

586 Parameters 

587 ---------- 

588 ref : `DatasetRef` or iterable thereof 

589 Reference to the required Dataset(s). 

590 ignore_errors : `bool`, optional 

591 Indicate that errors should be ignored. 

592 

593 Returns 

594 ------- 

595 None 

596 

597 Raises 

598 ------ 

599 FileNotFoundError 

600 Attempt to remove a dataset that does not exist. Only relevant 

601 if a single dataset ref is given. 

602 

603 Notes 

604 ----- 

605 Concurrency should not normally be an issue for the in memory datastore 

606 since all internal changes are isolated to solely this process and 

607 the registry only changes rows associated with this process. 

608 """ 

609 if isinstance(ref, DatasetRef): 

610 # Check that this dataset is known to datastore 

611 try: 

612 self._get_dataset_info(ref.id) 

613 except Exception as e: 

614 if ignore_errors: 

615 log.warning( 

616 "Error encountered moving dataset %s to trash in datastore %s: %s", ref, self.name, e 

617 ) 

618 else: 

619 raise 

620 log.debug("Trash %s in datastore %s", ref, self.name) 

621 ref_list = [ref] 

622 else: 

623 ref_list = list(ref) 

624 log.debug("Bulk trashing of datasets in datastore %s", self.name) 

625 

626 def _rollbackMoveToTrash(refs: Iterable[DatasetIdRef]) -> None: 

627 for ref in refs: 

628 self._trashedIds.remove(ref.id) 

629 

630 assert self._transaction is not None, "Must be in transaction" 

631 with self._transaction.undoWith(f"Trash {len(ref_list)} datasets", _rollbackMoveToTrash, ref_list): 

632 self._trashedIds.update(ref.id for ref in ref_list) 

633 

634 def emptyTrash( 

635 self, ignore_errors: bool = False, refs: Collection[DatasetRef] | None = None, dry_run: bool = False 

636 ) -> set[ResourcePath]: 

637 """Remove all datasets from the trash. 

638 

639 Parameters 

640 ---------- 

641 ignore_errors : `bool`, optional 

642 Ignore errors. 

643 refs : `collections.abc.Collection` [ `DatasetRef` ] or `None` 

644 Explicit list of datasets that can be removed from trash. If listed 

645 datasets are not already stored in the trash table they will be 

646 ignored. If `None` every entry in the trash table will be 

647 processed. 

648 dry_run : `bool`, optional 

649 If `True`, the trash table will be queried and results reported 

650 but no artifacts will be removed. 

651 

652 Returns 

653 ------- 

654 removed : `set` [ `lsst.resources.ResourcePath` ] 

655 List of artifacts that were removed. Empty for this datastore. 

656 

657 Notes 

658 ----- 

659 The internal tracking of datasets is affected by this method and 

660 transaction handling is not supported if there is a problem before 

661 the datasets themselves are deleted. 

662 

663 Concurrency should not normally be an issue for the in memory datastore 

664 since all internal changes are isolated to solely this process and 

665 the registry only changes rows associated with this process. 

666 """ 

667 log.debug("Emptying trash in datastore %s", self.name) 

668 

669 trashed_ids = self._trashedIds 

670 if refs: 

671 selected_ids = {ref.id for ref in refs} 

672 if selected_ids is not None: 

673 trashed_ids = {tid for tid in trashed_ids if tid in selected_ids} 

674 

675 if dry_run: 

676 log.info( 

677 "Would attempt remove %s dataset%s.", len(trashed_ids), "s" if len(trashed_ids) != 1 else "" 

678 ) 

679 return set() 

680 

681 for dataset_id in trashed_ids: 

682 try: 

683 realID, _ = self._get_dataset_info(dataset_id) 

684 except FileNotFoundError: 

685 # Dataset already removed so ignore it 

686 continue 

687 except Exception as e: 

688 if ignore_errors: 

689 log.warning( 

690 "Emptying trash in datastore %s but encountered an error with dataset %s: %s", 

691 self.name, 

692 dataset_id, 

693 e, 

694 ) 

695 continue 

696 else: 

697 raise 

698 

699 # Determine whether all references to this dataset have been 

700 # removed and we can delete the dataset itself 

701 allRefs = self.related[realID] 

702 remainingRefs = allRefs - {dataset_id} 

703 if not remainingRefs: 

704 log.debug("Removing artifact %s from datastore %s", realID, self.name) 

705 del self.datasets[realID] 

706 

707 # Remove this entry 

708 self._remove_stored_item_info(dataset_id) 

709 

710 # Empty the trash table 

711 self._trashedIds = self._trashedIds - trashed_ids 

712 return set() 

713 

714 def validateConfiguration( 

715 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

716 ) -> None: 

717 """Validate some of the configuration for this datastore. 

718 

719 Parameters 

720 ---------- 

721 entities : `~collections.abc.Iterable` [`DatasetRef` | `DatasetType`\ 

722 | `StorageClass`] 

723 Entities to test against this configuration. Can be differing 

724 types. 

725 logFailures : `bool`, optional 

726 If `True`, output a log message for every validation error 

727 detected. 

728 

729 Returns 

730 ------- 

731 None 

732 

733 Raises 

734 ------ 

735 DatastoreValidationError 

736 Raised if there is a validation problem with a configuration. 

737 All the problems are reported in a single exception. 

738 

739 Notes 

740 ----- 

741 This method is a no-op. 

742 """ 

743 return 

744 

745 def _overrideTransferMode(self, *datasets: Any, transfer: str | None = None) -> str | None: 

746 # Docstring is inherited from base class 

747 return transfer 

748 

749 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

750 # Docstring is inherited from base class 

751 return 

752 

753 def getLookupKeys(self) -> set[LookupKey]: 

754 # Docstring is inherited from base class 

755 return self.constraints.getLookupKeys() 

756 

757 def needs_expanded_data_ids( 

758 self, 

759 transfer: str | None, 

760 entity: DatasetRef | DatasetType | StorageClass | None = None, 

761 ) -> bool: 

762 # Docstring inherited. 

763 return False 

764 

765 def import_records(self, data: Mapping[str, DatastoreRecordData]) -> None: 

766 # Docstring inherited from the base class. 

767 return 

768 

769 def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]: 

770 # Docstring inherited from the base class. 

771 

772 # In-memory Datastore records cannot be exported or imported 

773 return {} 

774 

775 def export_predicted_records(self, refs: Iterable[DatasetIdRef]) -> dict[str, DatastoreRecordData]: 

776 # Docstring inherited from the base class. 

777 

778 # In-memory Datastore records cannot be exported or imported 

779 return {} 

780 

781 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]: 

782 # Docstring inherited from the base class. 

783 return {}